Compare commits

..

23 Commits

Author SHA1 Message Date
Evan Lezar
88862eebbf Merge branch 'bump-1.12.2' into 'release-1.12'
Bump version to v1.12.2

See merge request nvidia/container-toolkit/container-toolkit!341
2023-03-14 17:14:57 +00:00
Evan Lezar
bb6aea3045 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-14 16:35:45 +02:00
Evan Lezar
745c8ee2e7 Bump version to v1.12.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-14 16:35:35 +02:00
Evan Lezar
0bedf07e7b Merge branch 'backport-install-nvidia-ctk' into 'release-1.12'
Backport changes for 1.12.1 release

See merge request nvidia/container-toolkit/container-toolkit!332
2023-03-10 11:24:34 +00:00
Evan Lezar
5c86ca17ef Update CHANGELOG.md
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-10 12:47:14 +02:00
Evan Lezar
0741252a7c Merge branch 'bump-cuda-version' into 'main'
Bump CUDA base image version to 12.1.0

See merge request nvidia/container-toolkit/container-toolkit!335
2023-03-10 12:26:09 +02:00
Evan Lezar
2d9e4cb720 Merge branch 'set-nvidia-ctk-path' into 'main'
Set nvidia-ctk.path config option based on installed path

See merge request nvidia/container-toolkit/container-toolkit!334
2023-03-09 20:18:32 +02:00
Evan Lezar
12aca454ab Also install nvidia-ctk in toolkit-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 11:49:16 +02:00
Evan Lezar
57a4072fae Merge branch 'fix-internal' into 'release-1.12'
Merge branch 'fix-internal-scans' into 'main'

See merge request nvidia/container-toolkit/container-toolkit!322
2023-03-06 12:07:02 +00:00
Evan Lezar
bac1222871 Merge branch 'fix-internal-scans' into 'main'
Fix internal scans

See merge request nvidia/container-toolkit/container-toolkit!316
2023-03-06 14:06:02 +02:00
Evan Lezar
0023db2150 Merge branch 'fix-rule-for-release' into 'release-1.12'
Run full build on release- branches

See merge request nvidia/container-toolkit/container-toolkit!319
2023-03-06 10:55:26 +00:00
Evan Lezar
3ffc8cf86d Merge branch 'cherry-pick-1.12.1' into 'release-1.12'
Bump version to 1.12.1 and cherry-pick changes

See merge request nvidia/container-toolkit/container-toolkit!311
2023-03-06 10:06:48 +00:00
Evan Lezar
a6d02feba7 Merge branch 'fix-rpm-postun-scriptlet' into 'main'
nvidia-container-toolkit.spec: fix syntax error in postun scriptlet

See merge request nvidia/container-toolkit/container-toolkit!309
2023-03-06 11:12:51 +02:00
Evan Lezar
9fc25ac641 Merge branch 'remove-outdated-platforms' into 'main'
Remove outdated platforms from CI

See merge request nvidia/container-toolkit/container-toolkit!310
2023-03-06 11:12:51 +02:00
Evan Lezar
7df553ad09 Merge branch 'CNT-3965/dont-fail-chmod-hook' into 'main'
Skip paths with errors in chmod hook

See merge request nvidia/container-toolkit/container-toolkit!303
2023-03-06 11:12:51 +02:00
Evan Lezar
d5ab1c0ba0 Merge branch 'fix-nvidia-ctk-path' into 'main'
Ensure that generate uses a consistent nvidia-ctk path

See merge request nvidia/container-toolkit/container-toolkit!301
2023-03-06 11:12:51 +02:00
Evan Lezar
bc37b97c73 Merge branch 'fix-nvidia-ctk-path' into 'main'
Fix issue with blank nvidia-ctk path

See merge request nvidia/container-toolkit/container-toolkit!297
2023-03-06 11:12:51 +02:00
Kevin Klues
2081750bea Merge branch 'support-multimple-firmware-files' into 'main'
Add globbing for mounting multiple GSP firmware files

See merge request nvidia/container-toolkit/container-toolkit!295
2023-03-06 11:12:51 +02:00
Evan Lezar
a2d1f4cfb4 Merge branch 'update-ldflags' into 'main'
Update ldflags for cgo

See merge request nvidia/container-toolkit/container-toolkit!290
2023-03-06 11:12:51 +02:00
Evan Lezar
944922a541 Remove fedora35 pipeline targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 11:12:51 +02:00
Evan Lezar
5995305554 Skip component updates on release-* branches
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 11:12:51 +02:00
Evan Lezar
bc81d5e68b Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 11:12:42 +02:00
Evan Lezar
d50e7f1f4f Bump version to 1.12.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 14:51:32 +02:00
894 changed files with 30930 additions and 105913 deletions

View File

@@ -19,10 +19,10 @@ default:
variables:
GIT_SUBMODULE_STRATEGY: recursive
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
BUILD_MULTI_ARCH_IMAGES: "true"
stages:
- trigger
- image
- lint
- go-checks
@@ -33,70 +33,70 @@ stages:
- test
- scan
- release
- sign
.pipeline-trigger-rules:
rules:
# We trigger the pipeline if started manually
- if: $CI_PIPELINE_SOURCE == "web"
# We trigger the pipeline on the main branch
- if: $CI_COMMIT_BRANCH == "main"
# We trigger the pipeline on the release- branches
- if: $CI_COMMIT_BRANCH =~ /^release-.*$/
# We trigger the pipeline on tags
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
workflow:
rules:
# We trigger the pipeline on a merge request
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
# We then add all the regular triggers
- !reference [.pipeline-trigger-rules, rules]
# The main or manual job is used to filter out distributions or architectures that are not required on
# every build.
.main-or-manual:
rules:
- !reference [.pipeline-trigger-rules, rules]
- if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_BRANCH =~ /^release-.*$/
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
- if: $CI_PIPELINE_SOURCE == "schedule"
when: manual
# The trigger-pipeline job adds a manualy triggered job to the pipeline on merge requests.
trigger-pipeline:
stage: trigger
script:
- echo "starting pipeline"
# Define the distribution targets
.dist-amazonlinux2:
rules:
- !reference [.main-or-manual, rules]
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
when: manual
allow_failure: false
- when: always
variables:
DIST: amazonlinux2
PACKAGE_REPO_TYPE: rpm
# Define the distribution targets
.dist-centos7:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: centos7
CVE_UPDATES: "cyrus-sasl-lib"
PACKAGE_REPO_TYPE: rpm
.dist-centos8:
variables:
DIST: centos8
CVE_UPDATES: "cyrus-sasl-lib"
PACKAGE_REPO_TYPE: rpm
.dist-debian10:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: debian10
PACKAGE_REPO_TYPE: debian
.dist-opensuse-leap15.1:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: opensuse-leap15.1
PACKAGE_REPO_TYPE: rpm
.dist-ubi8:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: ubi8
CVE_UPDATES: "cyrus-sasl-lib"
PACKAGE_REPO_TYPE: rpm
.dist-ubuntu18.04:
variables:
DIST: ubuntu18.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
PACKAGE_REPO_TYPE: debian
.dist-ubuntu20.04:
variables:
DIST: ubuntu20.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
PACKAGE_REPO_TYPE: debian
.dist-packaging:
variables:
@@ -145,7 +145,7 @@ trigger-pipeline:
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
script:
- make -f deployments/container/Makefile test-${DIST}
- make -f build/container/Makefile test-${DIST}
# Define the test targets
test-packaging:
@@ -195,7 +195,7 @@ test-packaging:
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
# Target
- make -f deployments/container/Makefile push-${DIST}
- make -f build/container/Makefile push-${DIST}
# Define a staging release step that pushes an image to an internal "staging" repository
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
@@ -214,8 +214,6 @@ test-packaging:
.release:external:
extends:
- .release
variables:
FORCE_PUBLISH_IMAGES: "yes"
rules:
- if: $CI_COMMIT_TAG
variables:
@@ -225,6 +223,13 @@ test-packaging:
OUT_IMAGE_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}"
# Define the release jobs
release:staging-centos7:
extends:
- .release:staging
- .dist-centos7
needs:
- image-centos7
release:staging-ubi8:
extends:
- .release:staging

View File

@@ -1,67 +0,0 @@
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "gomod"
target-branch: main
directory: "/"
schedule:
interval: "weekly"
day: "sunday"
ignore:
- dependency-name: k8s.io/*
labels:
- dependencies
- package-ecosystem: "docker"
target-branch: main
directory: "/deployments/container"
schedule:
interval: "daily"
- package-ecosystem: "gomod"
# This defines a specific dependabot rule for the latest release-* branch.
target-branch: release-1.15
directory: "/"
schedule:
interval: "weekly"
day: "sunday"
ignore:
- dependency-name: k8s.io/*
labels:
- dependencies
- maintenance
- package-ecosystem: "docker"
target-branch: release-1.15
directory: "/deployments/container"
schedule:
interval: "daily"
labels:
- dependencies
- maintenance
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "github-actions"
target-branch: gh-pages
directory: "/"
schedule:
interval: "weekly"
day: "monday"
# Allow dependabot to update the libnvidia-container submodule.
- package-ecosystem: "gitsubmodule"
target-branch: main
directory: "/"
allow:
- dependency-name: "third_party/libnvidia-container"
schedule:
interval: "daily"
labels:
- dependencies
- libnvidia-container

View File

@@ -1,76 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Golang
on:
pull_request:
types:
- opened
- synchronize
branches:
- main
- release-*
push:
branches:
- main
- release-*
jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION :=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- name: Lint
uses: golangci/golangci-lint-action@v6
with:
version: latest
args: -v --timeout 5m
skip-cache: true
- name: Check golang modules
run: make check-vendor
test:
name: Unit test
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION :=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- run: make test
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- name: Build
run: make docker-build

View File

@@ -1,138 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run this workflow on pull requests
name: image
on:
pull_request:
types:
- opened
- synchronize
branches:
- main
- release-*
push:
branches:
- main
- release-*
jobs:
packages:
runs-on: ubuntu-latest
strategy:
matrix:
target:
- ubuntu18.04-arm64
- ubuntu18.04-amd64
- ubuntu18.04-ppc64le
- centos7-aarch64
- centos7-x86_64
- centos8-ppc64le
ispr:
- ${{github.event_name == 'pull_request'}}
exclude:
- ispr: true
target: ubuntu18.04-arm64
- ispr: true
target: ubuntu18.04-ppc64le
- ispr: true
target: centos7-aarch64
- ispr: true
target: centos8-ppc64le
fail-fast: false
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: build ${{ matrix.target }} packages
run: |
sudo apt-get install -y coreutils build-essential sed git bash make
echo "Building packages"
./scripts/build-packages.sh ${{ matrix.target }}
- name: 'Upload Artifacts'
uses: actions/upload-artifact@v4
with:
compression-level: 0
name: toolkit-container-${{ matrix.target }}-${{ github.run_id }}
path: ${{ github.workspace }}/dist/*
image:
runs-on: ubuntu-latest
strategy:
matrix:
dist:
- ubuntu20.04
- ubi8
- packaging
ispr:
- ${{github.event_name == 'pull_request'}}
exclude:
- ispr: true
dist: ubi8
needs: packages
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Calculate build vars
id: vars
run: |
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
echo "${REPO_FULL_NAME}"
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
PUSH_ON_BUILD="false"
BUILD_MULTI_ARCH_IMAGES="false"
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
if [[ "${{ github.actor }}" != "dependabot[bot]" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
# For non-fork PRs that are not created by dependabot we do push images
PUSH_ON_BUILD="true"
fi
elif [[ "${{ github.event_name }}" == "push" ]]; then
# On push events we do generate images and enable muilti-arch builds
PUSH_ON_BUILD="true"
BUILD_MULTI_ARCH_IMAGES="true"
fi
echo "PUSH_ON_BUILD=${PUSH_ON_BUILD}" >> $GITHUB_ENV
echo "BUILD_MULTI_ARCH_IMAGES=${BUILD_MULTI_ARCH_IMAGES}" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get built packages
uses: actions/download-artifact@v4
with:
path: ${{ github.workspace }}/dist/
pattern: toolkit-container-*-${{ github.run_id }}
merge-multiple: true
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build image
env:
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/container-toolkit
VERSION: ${COMMIT_SHORT_SHA}
run: |
echo "${VERSION}"
make -f deployments/container/Makefile build-${{ matrix.dist }}

View File

@@ -1,52 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run this workflow on new tags
name: Release
on:
push:
tags:
- v*
jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Create Draft Release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OWNER: ${{ github.repository_owner }}
REPO: ${{ github.event.repository.name }}
run: |
GH_EXTRA_ARGS=""
if [[ ${{ github.ref }} == *-rc.* ]]; then
GH_EXTRA_ARGS="--prerelease"
fi
gh release create ${{ github.ref }} \
--draft \
-t "${{ github.ref }}" \
-R $OWNER/$REPO \
--verify-tag \
$GH_EXTRA_ARGS
- name: Upload Release Artifacts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OWNER: ${{ github.repository_owner }}
REPO: ${{ github.event.repository.name }}
run: |
gh release upload ${{ github.ref }} CHANGELOG.md -R $OWNER/$REPO

2
.gitignore vendored
View File

@@ -1,11 +1,9 @@
dist
artifacts
*.swp
*.swo
/coverage.out*
/test/output/
/nvidia-container-runtime
/nvidia-container-runtime.*
/nvidia-container-runtime-hook
/nvidia-container-toolkit
/nvidia-ctk

View File

@@ -15,6 +15,68 @@
include:
- .common-ci.yml
build-dev-image:
stage: image
script:
- apk --no-cache add make bash
- make .build-image
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- make .push-build-image
.requires-build-image:
image: "${BUILDIMAGE}"
.go-check:
extends:
- .requires-build-image
stage: go-checks
fmt:
extends:
- .go-check
script:
- make assert-fmt
vet:
extends:
- .go-check
script:
- make vet
lint:
extends:
- .go-check
script:
- make lint
allow_failure: true
ineffassign:
extends:
- .go-check
script:
- make ineffassign
allow_failure: true
misspell:
extends:
- .go-check
script:
- make misspell
go-build:
extends:
- .requires-build-image
stage: go-build
script:
- make build
unit-tests:
extends:
- .requires-build-image
stage: unit-tests
script:
- make coverage
# Define the package build helpers
.multi-arch-build:
before_script:
@@ -40,48 +102,56 @@ include:
name: ${ARTIFACTS_NAME}
paths:
- ${ARTIFACTS_ROOT}
needs:
- job: package-meta-packages
artifacts: true
# Define the package build targets
package-meta-packages:
extends:
- .package-artifacts
stage: package-build
variables:
SKIP_LIBNVIDIA_CONTAINER: "yes"
SKIP_NVIDIA_CONTAINER_TOOLKIT: "yes"
parallel:
matrix:
- PACKAGING: [deb, rpm]
before_script:
- apk add --no-cache coreutils build-base sed git bash make
script:
- ./scripts/build-packages.sh ${PACKAGING}
artifacts:
name: ${ARTIFACTS_NAME}
paths:
- ${ARTIFACTS_ROOT}
package-centos7-aarch64:
package-amazonlinux2-aarch64:
extends:
- .package-build
- .dist-centos7
- .dist-amazonlinux2
- .arch-aarch64
package-amazonlinux2-x86_64:
extends:
- .package-build
- .dist-amazonlinux2
- .arch-x86_64
package-centos7-x86_64:
extends:
- .package-build
- .dist-centos7
- .arch-x86_64
package-centos8-aarch64:
extends:
- .package-build
- .dist-centos8
- .arch-aarch64
package-centos8-ppc64le:
extends:
- .package-build
- .dist-centos8
- .arch-ppc64le
package-centos8-x86_64:
extends:
- .package-build
- .dist-centos8
- .arch-x86_64
package-debian10-amd64:
extends:
- .package-build
- .dist-debian10
- .arch-amd64
package-opensuse-leap15.1-x86_64:
extends:
- .package-build
- .dist-opensuse-leap15.1
- .arch-x86_64
package-ubuntu18.04-amd64:
extends:
- .package-build
@@ -126,7 +196,15 @@ package-ubuntu18.04-ppc64le:
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
script:
- make -f deployments/container/Makefile build-${DIST}
- make -f build/container/Makefile build-${DIST}
image-centos7:
extends:
- .image-build
- .package-artifacts
- .dist-centos7
needs:
- package-centos7-x86_64
image-ubi8:
extends:
@@ -134,9 +212,10 @@ image-ubi8:
- .package-artifacts
- .dist-ubi8
needs:
# Note: The ubi8 image uses the centos7 packages
- package-centos7-aarch64
- package-centos7-x86_64
# Note: The ubi8 image uses the centos8 packages
- package-centos8-aarch64
- package-centos8-x86_64
- package-centos8-ppc64le
image-ubuntu20.04:
extends:
@@ -156,14 +235,14 @@ image-packaging:
- .package-artifacts
- .dist-packaging
needs:
- job: package-centos8-aarch64
- job: package-centos8-x86_64
- job: package-ubuntu18.04-amd64
- job: package-ubuntu18.04-arm64
- job: package-amazonlinux2-aarch64
optional: true
- job: package-amazonlinux2-x86_64
optional: true
- job: package-centos7-aarch64
optional: true
- job: package-centos7-x86_64
optional: true
- job: package-centos8-ppc64le
@@ -232,3 +311,4 @@ test-docker-ubuntu20.04:
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04

10
.gitmodules vendored
View File

@@ -1,4 +1,12 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://github.com/NVIDIA/libnvidia-container.git
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = main
[submodule "third_party/nvidia-container-runtime"]
path = third_party/nvidia-container-runtime
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git
branch = main
[submodule "third_party/nvidia-docker"]
path = third_party/nvidia-docker
url = https://gitlab.com/nvidia/container-toolkit/nvidia-docker.git
branch = main

View File

@@ -1,36 +0,0 @@
run:
deadline: 10m
linters:
enable:
- contextcheck
- gocritic
- gofmt
- goimports
- gosec
- gosimple
- govet
- ineffassign
- misspell
- staticcheck
- unconvert
linters-settings:
goimports:
local-prefixes: github.com/NVIDIA/nvidia-container-toolkit
issues:
exclude:
# The legacy hook relies on spec.Hooks.Prestart, which is deprecated as of the v1.2.0 OCI runtime spec.
- "SA1019:(.+).Prestart is deprecated(.+)"
exclude-rules:
# Exclude the gocritic dupSubExpr issue for cgo files.
- path: internal/dxcore/dxcore.go
linters:
- gocritic
text: dupSubExpr
# Exclude the checks for usage of returns to config.Delete(Path) in the crio and containerd config packages.
- path: pkg/config/engine/
linters:
- errcheck
text: config.Delete

View File

@@ -33,11 +33,11 @@ variables:
# On the multi-arch builder we don't need the qemu setup.
SKIP_QEMU_SETUP: "1"
# Define the public staging registry
STAGING_REGISTRY: ghcr.io/nvidia
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
KITMAKER_RELEASE_FOLDER: "kitmaker"
PACKAGE_ARCHIVE_RELEASE_FOLDER: "releases"
# TODO: We should set the kitmaker release folder here once we have the end-to-end workflow set up
KITMAKER_RELEASE_FOLDER: "testing"
.image-pull:
stage: image-build
@@ -67,7 +67,12 @@ variables:
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
script:
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
- make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
image-centos7:
extends:
- .dist-centos7
- .image-pull
image-ubi8:
extends:
@@ -115,7 +120,6 @@ image-packaging:
- if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi
script:
- pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
- rm -f "${IMAGE_ARCHIVE}"
artifacts:
when: always
expire_in: 1 week
@@ -127,6 +131,23 @@ image-packaging:
- policy_evaluation.json
# Define the scan targets
scan-centos7-amd64:
extends:
- .dist-centos7
- .platform-amd64
- .scan
needs:
- image-centos7
scan-centos7-arm64:
extends:
- .dist-centos7
- .platform-arm64
- .scan
needs:
- image-centos7
- scan-centos7-amd64
scan-ubuntu20.04-amd64:
extends:
- .dist-ubuntu20.04
@@ -161,13 +182,6 @@ scan-ubi8-arm64:
- image-ubi8
- scan-ubi8-amd64
scan-packaging:
extends:
- .dist-packaging
- .scan
needs:
- image-packaging
# Define external release helpers
.release:ngc:
extends:
@@ -190,37 +204,19 @@ scan-packaging:
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
KITMAKER_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${KITMAKER_RELEASE_FOLDER}"
ARTIFACTS_DIR: "${CI_PROJECT_DIR}/artifacts"
script:
- !reference [.regctl-setup, before_script]
- apk add --no-cache bash git
- regctl registry login "${PACKAGE_REGISTRY}" -u "${PACKAGE_REGISTRY_USER}" -p "${PACKAGE_REGISTRY_TOKEN}"
- ./scripts/extract-packages.sh "${PACKAGE_IMAGE_NAME}:${PACKAGE_IMAGE_TAG}"
# TODO: ./scripts/release-packages-artifactory.sh "${DIST}-${ARCH}" "${PACKAGE_ARTIFACTORY_REPO}"
- ./scripts/release-kitmaker-artifactory.sh "${KITMAKER_ARTIFACTORY_REPO}"
- rm -rf ${ARTIFACTS_DIR}
# Define the package release targets
release:packages:kitmaker:
extends:
- .release:packages
release:archive:
extends:
- .release:external
needs:
- image-packaging
variables:
VERSION: "${CI_COMMIT_SHORT_SHA}"
PACKAGE_REGISTRY: "${CI_REGISTRY}"
PACKAGE_REGISTRY_USER: "${CI_REGISTRY_USER}"
PACKAGE_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
PACKAGE_ARCHIVE_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${PACKAGE_ARCHIVE_RELEASE_FOLDER}"
script:
- apk add --no-cache bash git
- ./scripts/archive-packages.sh "${PACKAGE_ARCHIVE_ARTIFACTORY_REPO}"
release:staging-ubuntu20.04:
extends:
- .release:staging
@@ -230,6 +226,11 @@ release:staging-ubuntu20.04:
# Define the external release targets
# Release to NGC
release:ngc-centos7:
extends:
- .dist-centos7
- .release:ngc
release:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04
@@ -239,67 +240,3 @@ release:ngc-ubi8:
extends:
- .dist-ubi8
- .release:ngc
release:ngc-packaging:
extends:
- .dist-packaging
- .release:ngc
# Define the external image signing steps for NGC
# Download the ngc cli binary for use in the sign steps
.ngccli-setup:
before_script:
- apt-get update && apt-get install -y curl unzip jq
- |
if [ -z "${NGCCLI_VERSION}" ]; then
NGC_VERSION_URL="https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions"
# Extract the latest version from the JSON data using jq
export NGCCLI_VERSION=$(curl -s $NGC_VERSION_URL | jq -r '.recipe.latestVersionIdStr')
fi
echo "NGCCLI_VERSION ${NGCCLI_VERSION}"
- curl -sSLo ngccli_linux.zip https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/${NGCCLI_VERSION}/files/ngccli_linux.zip
- unzip ngccli_linux.zip
- chmod u+x ngc-cli/ngc
# .sign forms the base of the deployment jobs which signs images in the CI registry.
# This is extended with the image name and version to be deployed.
.sign:ngc:
image: ubuntu:latest
stage: sign
rules:
- if: $CI_COMMIT_TAG
variables:
NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}"
IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
IMAGE_TAG: "${CI_COMMIT_TAG}-${DIST}"
retry:
max: 2
before_script:
- !reference [.ngccli-setup, before_script]
# We ensure that the IMAGE_NAME and IMAGE_TAG is set
- 'echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1'
- 'echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1'
script:
- 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"'
- ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia
sign:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04
- .sign:ngc
needs:
- release:ngc-ubuntu20.04
sign:ngc-ubi8:
extends:
- .dist-ubi8
- .sign:ngc
needs:
- release:ngc-ubi8
sign:ngc-packaging:
extends:
- .dist-packaging
- .sign:ngc
needs:
- release:ngc-packaging

View File

@@ -1,209 +1,21 @@
# NVIDIA Container Toolkit Changelog
## v1.16.0-rc.1
## v1.12.2
- Support vulkan ICD files directly in a driver root. This allows for the discovery of vulkan files in GKE driver installations.
- Increase priority of ld.so.conf.d config file injected into container. This ensures that injected libraries are preferred over libraries present in the container.
- Set default CDI spec permissions to 644. This fixes permission issues when using the `nvidia-ctk cdi transform` functions.
- Add `dev-root` option to `nvidia-ctk system create-device-nodes` command.
- Fix location of `libnvidia-ml.so.1` when a non-standard driver root is used. This enabled CDI spec generation when using the driver container on a host.
- Recalculate minimum required CDI spec version on save.
- Move `nvidia-ctk hook` commands to a separate `nvidia-cdi-hook` binary. The same subcommands are supported.
- Use `:` as an `nvidia-ctk config --set` list separator. This fixes a bug when trying to set config options that are lists.
* [libnvidia-container] Fix segfault on WSL2 systems
- [toolkit-container] Bump CUDA base image version to 12.5.0
- [toolkit-container] Allow the path to `toolkit.pid` to be specified directly.
- [toolkit-container] Remove provenance information from image manifests.
- [toolkit-container] Add `dev-root` option when configuring the toolkit. This adds support for GKE driver installations.
## v1.15.0
* Remove `nvidia-container-runtime` and `nvidia-docker2` packages.
* Use `XDG_DATA_DIRS` environment variable when locating config files such as graphics config files.
* Add support for v0.7.0 Container Device Interface (CDI) specification.
* Add `--config-search-path` option to `nvidia-ctk cdi generate` command. These paths are used when locating driver files such as graphics config files.
* Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
* Add support for v1.2.0 OCI Runtime specification.
* Explicitly set `NVIDIA_VISIBLE_DEVICES=void` in generated CDI specifications. This prevents the NVIDIA Container Runtime from making additional modifications.
* [libnvidia-container] Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
* [toolkit-container] Bump CUDA base image version to 12.4.1
## v1.15.0-rc.4
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
CDI spec to be generated that includes GPUs by index and UUID.
* Set the default `--device-name-strategy` for the `nvidia-ctk cdi generate` command to `[index, uuid]`.
* Remove `libnvidia-container0` jetpack dependency included for legacy Tegra-based systems.
* Add `NVIDIA_VISIBLE_DEVICES=void` to generated CDI specifications.
* [toolkit-container] Remove centos7 image. The ubi8 image can be used on all RPM-based platforms.
* [toolkit-container] Bump CUDA base image version to 12.3.2
## v1.15.0-rc.3
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.
## v1.15.0-rc.2
* Extend the `runtime.nvidia.com/gpu` CDI kind to support full-GPUs and MIG devices specified by index or UUID.
* Fix bug when specifying `--dev-root` for Tegra-based systems.
* Log explicitly requested runtime mode.
* Remove package dependency on libseccomp.
* Added detection of libnvdxgdmal.so.1 on WSL2
* Use devRoot to resolve MIG device nodes.
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
* Add `crun` to the list of configured low-level runtimes.
* Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command.
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker.
* Add discovery of the GDRCopy device (`gdrdrv`) if the `NVIDIA_GDRCOPY` environment variable of the container is set to `enabled`
* [toolkit-container] Bump CUDA base image version to 12.3.1.
## v1.15.0-rc.1
* Skip update of ldcache in containers without ldconfig. The .so.SONAME symlinks are still created.
* Normalize ldconfig path on use. This automatically adjust the ldconfig setting applied to ldconfig.real on systems where this exists.
* Include `nvidia/nvoptix.bin` in list of graphics mounts.
* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts.
* Add support for `--library-search-paths` to `nvidia-ctk cdi generate` command.
* Add support for injecting /dev/nvidia-nvswitch* devices if the NVIDIA_NVSWITCH=enabled envvar is specified.
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly.
* Add `--relative-to` option to `nvidia-ctk transform root` command. This controls whether the root transformation is applied to host or container paths.
* Added automatic CDI spec generation when the `runtime.nvidia.com/gpu=all` device is requested by a container.
* [libnvidia-container] Fix device permission check when using cgroupv2 (fixes #227)
## v1.14.3
* [toolkit-container] Bump CUDA base image version to 12.2.2.
## v1.14.2
* Fix bug on Tegra-based systems where symlinks were not created in containers.
* Add --csv.ignore-pattern command line option to nvidia-ctk cdi generate command.
## v1.14.1
* Fixed bug where contents of `/etc/nvidia-container-runtime/config.toml` is ignored by the NVIDIA Container Runtime Hook.
* [libnvidia-container] Use libelf.so on RPM-based systems due to removed mageia repositories hosting pmake and bmake.
## v1.14.0
* Promote v1.14.0-rc.3 to v1.14.0
## v1.14.0-rc.3
* Added support for generating OCI hook JSON file to `nvidia-ctk runtime configure` command.
* Remove installation of OCI hook JSON from RPM package.
* Refactored config for `nvidia-container-runtime-hook`.
* Added a `nvidia-ctk config` command which supports setting config options using a `--set` flag.
* Added `--library-search-path` option to `nvidia-ctk cdi generate` command in `csv` mode. This allows folders where
libraries are located to be specified explicitly.
* Updated go-nvlib to support devices which are not present in the PCI device database. This allows the creation of dev/char symlinks on systems with such devices installed.
* Added `UsesNVGPUModule` info function for more robust platform detection. This is required on Tegra-based systems where libnvidia-ml.so is also supported.
* [toolkit-container] Set `NVIDIA_VISIBLE_DEVICES=void` to prevent injection of NVIDIA devices and drivers into the NVIDIA Container Toolkit container.
## v1.14.0-rc.2
* Fix bug causing incorrect nvidia-smi symlink to be created on WSL2 systems with multiple driver roots.
* Remove dependency on coreutils when installing package on RPM-based systems.
* Create ouput folders if required when running `nvidia-ctk runtime configure`
* Generate default config as post-install step.
* Added support for detecting GSP firmware at custom paths when generating CDI specifications.
* Added logic to skip the extraction of image requirements if `NVIDIA_DISABLE_REQUIRES` is set to `true`.
* [libnvidia-container] Include Shared Compiler Library (libnvidia-gpucomp.so) in the list of compute libaries.
* [toolkit-container] Ensure that common envvars have higher priority when configuring the container engines.
* [toolkit-container] Bump CUDA base image version to 12.2.0.
* [toolkit-container] Remove installation of nvidia-experimental runtime. This is superceded by the NVIDIA Container Runtime in CDI mode.
## v1.14.0-rc.1
* Add support for updating containerd configs to the `nvidia-ctk runtime configure` command.
* Create file in `etc/ld.so.conf.d` with permissions `644` to support non-root containers.
* Generate CDI specification files with `644` permissions to allow rootless applications (e.g. podman)
* Add `nvidia-ctk cdi list` command to show the known CDI devices.
* Add support for generating merged devices (e.g. `all` device) to the nvcdi API.
* Use *.* pattern to locate libcuda.so when generating a CDI specification to support platforms where a patch version is not specified.
* Update go-nvlib to skip devices that are not MIG capable when generating CDI specifications.
* Add `nvidia-container-runtime-hook.path` config option to specify NVIDIA Container Runtime Hook path explicitly.
* Fix bug in creation of `/dev/char` symlinks by failing operation if kernel modules are not loaded.
* Add option to load kernel modules when creating device nodes
* Add option to create device nodes when creating `/dev/char` symlinks
* [libnvidia-container] Support OpenSSL 3 with the Encrypt/Decrypt library
* [toolkit-container] Allow same envars for all runtime configs
## v1.13.1
* Update `update-ldcache` hook to only update ldcache if it exists.
* Update `update-ldcache` hook to create `/etc/ld.so.conf.d` folder if it doesn't exist.
* Fix failure when libcuda cannot be located during XOrg library discovery.
* Fix CDI spec generation on systems that use `/etc/alternatives` (e.g. Debian)
## v1.13.0
* Promote 1.13.0-rc.3 to 1.13.0
## v1.13.0-rc.3
* Only initialize NVML for modes that require it when runing `nvidia-ctk cdi generate`.
* Prefer /run over /var/run when locating nvidia-persistenced and nvidia-fabricmanager sockets.
* Fix the generation of CDI specifications for management containers when the driver libraries are not in the LDCache.
* Add transformers to deduplicate and simplify CDI specifications.
* Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions.
* Also return an error from the nvcdi.New constructor instead of panicing.
* Detect XOrg libraries for injection and CDI spec generation.
* Add `nvidia-ctk system create-device-nodes` command to create control devices.
* Add `nvidia-ctk cdi transform` command to apply transforms to CDI specifications.
* Add `--vendor` and `--class` options to `nvidia-ctk cdi generate`
* [libnvidia-container] Fix segmentation fault when RPC initialization fails.
* [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2.
* [libnvidia-container] Remove make targets for fedora35 as the centos8 packages are compatible.
* [toolkit-container] Add `nvidia-container-runtime.modes.cdi.annotation-prefixes` config option that allows the CDI annotation prefixes that are read to be overridden.
* [toolkit-container] Create device nodes when generating CDI specification for management containers.
* [toolkit-container] Add `nvidia-container-runtime.runtimes` config option to set the low-level runtime for the NVIDIA Container Runtime
## v1.13.0-rc.2
## v1.12.1
* Don't fail chmod hook if paths are not injected
* Only create `by-path` symlinks if CDI devices are actually requested.
* Fix possible blank `nvidia-ctk` path in generated CDI specifications
* Fix error in postun scriplet on RPM-based systems
* Only check `NVIDIA_VISIBLE_DEVICES` for environment variables if no annotations are specified.
* Add `cdi.default-kind` config option for constructing fully-qualified CDI device names in CDI mode
* Add support for `accept-nvidia-visible-devices-envvar-unprivileged` config setting in CDI mode
* Add `nvidia-container-runtime-hook.skip-mode-detection` config option to bypass mode detection. This allows `legacy` and `cdi` mode, for example, to be used at the same time.
* Add support for generating CDI specifications for GDS and MOFED devices
* Ensure CDI specification is validated on save when generating a spec
* Rename `--discovery-mode` argument to `--mode` for `nvidia-ctk cdi generate`
* [libnvidia-container] Fix segfault on WSL2 systems
* [toolkit-container] Add `--cdi-enabled` flag to toolkit config
* Fix missing NVML symbols when running `nvidia-ctk` on some platforms [#49]
* Discover all `gsb*.bin` GSP firmware files when generating CDI specification.
* Remove `fedora35` packaging targets
* [libnvidia-container] Include all `gsp*.bin` firmware files if present
* [toolkit-container] Install `nvidia-ctk` from toolkit container
* [toolkit-container] Use installed `nvidia-ctk` path in NVIDIA Container Toolkit config
* [toolkit-container] Bump CUDA base images to 12.1.0
* [toolkit-container] Set `nvidia-ctk` path in the
* [toolkit-container] Add `cdi.k8s.io/*` to set of allowed annotations in containerd config
* [toolkit-container] Generate CDI specification for use in management containers
* [toolkit-container] Install experimental runtime as `nvidia-container-runtime.experimental` instead of `nvidia-container-runtime-experimental`
* [toolkit-container] Install and configure mode-specific runtimes for `cdi` and `legacy` modes
## v1.13.0-rc.1
* Include MIG-enabled devices as GPUs when generating CDI specification
* Fix missing NVML symbols when running `nvidia-ctk` on some platforms [#49]
* Add CDI spec generation for WSL2-based systems to `nvidia-ctk cdi generate` command
* Add `auto` mode to `nvidia-ctk cdi generate` command to automatically detect a WSL2-based system over a standard NVML-based system.
* Add mode-specific (`.cdi` and `.legacy`) NVIDIA Container Runtime binaries for use in the GPU Operator
* Discover all `gsb*.bin` GSP firmware files when generating CDI specification.
* Align `.deb` and `.rpm` release candidate package versions
* Remove `fedora35` packaging targets
* [libnvidia-container] Include all `gsp*.bin` firmware files if present
* [libnvidia-container] Align `.deb` and `.rpm` release candidate package versions
* [libnvidia-container] Remove `fedora35` packaging targets
* [toolkit-container] Install `nvidia-container-toolkit-operator-extensions` package for mode-specific executables.
* [toolkit-container] Allow `nvidia-container-runtime.mode` to be set when configuring the NVIDIA Container Toolkit
## v1.12.0

View File

@@ -19,7 +19,7 @@ where `TARGET` is a make target that is valid for each of the sub-components.
These include:
* `ubuntu18.04-amd64`
* `centos7-x86_64`
* `centos8-x86_64`
If no `TARGET` is specified, all valid release targets are built.

142
Jenkinsfile vendored Normal file
View File

@@ -0,0 +1,142 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
podTemplate (cloud:'sw-gpu-cloudnative',
containers: [
containerTemplate(name: 'docker', image: 'docker:dind', ttyEnabled: true, privileged: true),
containerTemplate(name: 'golang', image: 'golang:1.16.3', ttyEnabled: true)
]) {
node(POD_LABEL) {
def scmInfo
stage('checkout') {
scmInfo = checkout(scm)
}
stage('dependencies') {
container('golang') {
sh 'GO111MODULE=off go get -u github.com/client9/misspell/cmd/misspell'
sh 'GO111MODULE=off go get -u github.com/gordonklaus/ineffassign'
sh 'GO111MODULE=off go get -u golang.org/x/lint/golint'
}
container('docker') {
sh 'apk add --no-cache make bash git'
}
}
stage('check') {
parallel (
getGolangStages(["assert-fmt", "lint", "vet", "ineffassign", "misspell"])
)
}
stage('test') {
parallel (
getGolangStages(["test"])
)
}
def versionInfo
stage('version') {
container('docker') {
versionInfo = getVersionInfo(scmInfo)
println "versionInfo=${versionInfo}"
}
}
def dist = 'ubuntu20.04'
def arch = 'amd64'
def stageLabel = "${dist}-${arch}"
stage('build-one') {
container('docker') {
stage (stageLabel) {
sh "make ${dist}-${arch}"
}
}
}
stage('release') {
container('docker') {
stage (stageLabel) {
def component = 'main'
def repository = 'sw-gpu-cloudnative-debian-local/pool/main/'
def uploadSpec = """{
"files":
[ {
"pattern": "./dist/${dist}/${arch}/*.deb",
"target": "${repository}",
"props": "deb.distribution=${dist};deb.component=${component};deb.architecture=${arch}"
}
]
}"""
sh "echo starting release with versionInfo=${versionInfo}"
if (versionInfo.isTag) {
// upload to artifactory repository
def server = Artifactory.server 'sw-gpu-artifactory'
server.upload spec: uploadSpec
} else {
sh "echo skipping release for non-tagged build"
}
}
}
}
}
}
def getGolangStages(def targets) {
stages = [:]
for (t in targets) {
stages[t] = getLintClosure(t)
}
return stages
}
def getLintClosure(def target) {
return {
container('golang') {
stage(target) {
sh "make ${target}"
}
}
}
}
// getVersionInfo returns a hash of version info
def getVersionInfo(def scmInfo) {
def versionInfo = [
isTag: isTag(scmInfo.GIT_BRANCH)
]
scmInfo.each { k, v -> versionInfo[k] = v }
return versionInfo
}
def isTag(def branch) {
if (!branch.startsWith('v')) {
return false
}
def version = shOutput('git describe --all --exact-match --always')
return version == "tags/${branch}"
}
def shOuptut(def script) {
return sh(script: script, returnStdout: true).trim()
}

View File

@@ -38,8 +38,8 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
CHECK_TARGETS := lint
MAKE_TARGETS := binaries build check fmt test examples cmds coverage generate licenses vendor check-vendor $(CHECK_TARGETS)
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
@@ -53,26 +53,22 @@ CLI_VERSION = $(VERSION)
endif
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
GOOS ?= linux
binaries: cmds
ifneq ($(PREFIX),)
cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)
ifneq ($(shell uname),Darwin)
EXTLDFLAGS = -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
else
EXTLDFLAGS = -Wl,-undefined,dynamic_lookup
endif
$(CMD_TARGETS): cmd-%:
go build -ldflags "-s -w '-extldflags=$(EXTLDFLAGS)' -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
build:
go build ./...
GOOS=$(GOOS) go build ./...
examples: $(EXAMPLE_TARGETS)
$(EXAMPLE_TARGETS): example-%:
go build ./examples/$(*)
GOOS=$(GOOS) go build ./examples/$(*)
all: check test build binary
check: $(CHECK_TARGETS)
@@ -82,28 +78,37 @@ fmt:
go list -f '{{.Dir}}' $(MODULE)/... \
| xargs gofmt -s -l -w
# Apply goimports -local github.com/NVIDIA/container-toolkit to the codebase
goimports:
go list -f {{.Dir}} $(MODULE)/... \
| xargs goimports -local $(MODULE) -w
assert-fmt:
go list -f '{{.Dir}}' $(MODULE)/... \
| xargs gofmt -s -l > fmt.out
@if [ -s fmt.out ]; then \
echo "\nERROR: The following files are not formatted:\n"; \
cat fmt.out; \
rm fmt.out; \
exit 1; \
else \
rm fmt.out; \
fi
ineffassign:
ineffassign $(MODULE)/...
lint:
golangci-lint run ./...
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
vendor:
go mod tidy
go mod vendor
go mod verify
misspell:
misspell $(MODULE)/...
check-vendor: vendor
git diff --quiet HEAD -- go.mod go.sum vendor
vet:
go vet $(MODULE)/...
licenses:
go-licenses csv $(MODULE)/...
COVERAGE_FILE := coverage.out
test: build cmds
go test -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
coverage: test
cat $(COVERAGE_FILE) | grep -v "_mock.go" > $(COVERAGE_FILE).no-mocks
@@ -112,15 +117,32 @@ coverage: test
generate:
go generate $(MODULE)/...
$(DOCKER_TARGETS): docker-%:
@echo "Running 'make $(*)' in container image $(BUILDIMAGE)"
# Generate an image for containerized builds
# Note: This image is local only
.PHONY: .build-image .pull-build-image .push-build-image
.build-image: docker/Dockerfile.devel
if [ x"$(SKIP_IMAGE_BUILD)" = x"" ]; then \
$(DOCKER) build \
--progress=plain \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--tag $(BUILDIMAGE) \
-f $(^) \
docker; \
fi
.pull-build-image:
$(DOCKER) pull $(BUILDIMAGE)
.push-build-image:
$(DOCKER) push $(BUILDIMAGE)
$(DOCKER_TARGETS): docker-%: .build-image
@echo "Running 'make $(*)' in docker container $(BUILDIMAGE)"
$(DOCKER) run \
--rm \
-e GOCACHE=/tmp/.cache/go \
-e GOMODCACHE=/tmp/.cache/gomod \
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
-v $(PWD):/work \
-w /work \
-e GOCACHE=/tmp/.cache \
-v $(PWD):$(PWD) \
-w $(PWD) \
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE) \
make $(*)
@@ -131,10 +153,8 @@ PHONY: .shell
$(DOCKER) run \
--rm \
-ti \
-e GOCACHE=/tmp/.cache/go \
-e GOMODCACHE=/tmp/.cache/gomod \
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
-v $(PWD):/work \
-w /work \
-e GOCACHE=/tmp/.cache \
-v $(PWD):$(PWD) \
-w $(PWD) \
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE)

View File

@@ -12,32 +12,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvidia/cuda:12.5.0-base-ubi8 as build
# NOTE: In cases where the libc version is a concern, we would have to use an
# image based on the target OS to build the golang executables here -- especially
# if cgo code is included.
FROM golang:${GOLANG_VERSION} as build
RUN yum install -y \
wget make git gcc \
&& \
rm -rf /var/cache/yum/*
# We override the GOPATH to ensure that the binaries are installed to
# /artifacts/bin
ARG GOPATH=/artifacts
ARG GOLANG_VERSION=x.x.x
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
# Install the experiemental nvidia-container-runtime
# NOTE: This will be integrated into the nvidia-container-toolkit package / repo
ARG NVIDIA_CONTAINER_RUNTIME_EXPERIMENTAL_VERSION=experimental
RUN GOPATH=/artifacts go install github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental@${NVIDIA_CONTAINER_RUNTIME_EXPERIMENTAL_VERSION}
WORKDIR /build
COPY . .
@@ -48,10 +40,20 @@ COPY . .
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvidia/cuda:12.5.0-base-ubi8
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG BASE_DIST
# See https://www.centos.org/centos-linux-eol/
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
( \
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
)
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=void
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=utility
ARG ARTIFACTS_ROOT
@@ -85,4 +87,11 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
yum update -y ${CVE_UPDATES} && \
rm -rf /var/cache/yum/*; \
fi
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
FROM nvidia/cuda:12.5.0-base-ubuntu20.04
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG ARTIFACTS_ROOT
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
@@ -22,11 +24,11 @@ COPY ${ARTIFACTS_ROOT} /artifacts/packages/
WORKDIR /artifacts/packages
# build-args are added to the manifest.txt file below.
ARG BASE_DIST
ARG PACKAGE_DIST
ARG PACKAGE_VERSION
ARG GIT_BRANCH
ARG GIT_COMMIT
ARG GIT_COMMIT_SHORT
ARG SOURCE_DATE_EPOCH
ARG VERSION

View File

@@ -12,31 +12,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvidia/cuda:12.5.0-base-ubuntu20.04 as build
# NOTE: In cases where the libc version is a concern, we would have to use an
# image based on the target OS to build the golang executables here -- especially
# if cgo code is included.
FROM golang:${GOLANG_VERSION} as build
RUN apt-get update && \
apt-get install -y wget make git gcc \
&& \
rm -rf /var/lib/apt/lists/*
# We override the GOPATH to ensure that the binaries are installed to
# /artifacts/bin
ARG GOPATH=/artifacts
ARG GOLANG_VERSION=x.x.x
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
# Install the experiemental nvidia-container-runtime
# NOTE: This will be integrated into the nvidia-container-toolkit package / repo
ARG NVIDIA_CONTAINER_RUNTIME_EXPERIMENTAL_VERSION=experimental
RUN GOPATH=/artifacts go install github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental@${NVIDIA_CONTAINER_RUNTIME_EXPERIMENTAL_VERSION}
WORKDIR /build
COPY . .
@@ -47,7 +40,7 @@ COPY . .
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvcr.io/nvidia/cuda:12.5.0-base-ubuntu20.04
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
RUN rm -f /etc/apt/sources.list.d/cuda.list
@@ -60,7 +53,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
rm -rf /var/lib/apt/lists/*
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=void
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=utility
ARG ARTIFACTS_ROOT
@@ -73,6 +66,14 @@ ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH ${TARGETARCH}
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container"
ARG LIBNVIDIA_CONTAINER0_VERSION
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
fi
RUN dpkg -i \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
@@ -94,4 +95,11 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
apt-get update && apt-get upgrade -y ${CVE_UPDATES} && \
rm -rf /var/lib/apt/lists/*; \
fi
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -14,7 +14,6 @@
BUILD_MULTI_ARCH_IMAGES ?= false
DOCKER ?= docker
REGCTL ?= regctl
BUILDX =
ifeq ($(BUILD_MULTI_ARCH_IMAGES),true)
@@ -45,20 +44,20 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DEFAULT_PUSH_TARGET := ubuntu20.04
DISTRIBUTIONS := ubuntu20.04 ubi8
DISTRIBUTIONS := ubuntu20.04 ubi8 centos7
META_TARGETS := packaging
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
TEST_TARGETS := $(patsubst %,test-%, $(DISTRIBUTIONS))
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
include $(CURDIR)/deployments/container/native-only.mk
include $(CURDIR)/build/container/native-only.mk
else
include $(CURDIR)/deployments/container/multi-arch.mk
include $(CURDIR)/build/container/multi-arch.mk
endif
# For the default push target we also push a short tag equal to the version.
@@ -75,16 +74,8 @@ endif
push-%: DIST = $(*)
push-short: DIST = $(DEFAULT_PUSH_TARGET)
# Define the push targets
$(PUSH_TARGETS): push-%:
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
push-short:
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SUFFIX)
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
@@ -92,32 +83,46 @@ ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
DOCKER_BUILDKIT=1 \
$(DOCKER) $(BUILDX) build --pull \
--provenance=false --sbom=false \
$(DOCKER_BUILD_OPTIONS) \
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
--tag $(IMAGE) \
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
--build-arg BASE_DIST="$(BASE_DIST)" \
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg VERSION="$(VERSION)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--build-arg GIT_COMMIT_SHORT="$(GIT_COMMIT_SHORT)" \
--build-arg GIT_BRANCH="$(GIT_BRANCH)" \
--build-arg SOURCE_DATE_EPOCH="$(SOURCE_DATE_EPOCH)" \
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
-f $(DOCKERFILE) \
$(CURDIR)
build-ubuntu%: BASE_DIST = $(*)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
build-ubi8: DOCKERFILE_SUFFIX := ubi8
build-ubi8: PACKAGE_DIST = centos7
build-ubi8: BASE_DIST := ubi8
build-ubi8: DOCKERFILE_SUFFIX := centos
build-ubi8: PACKAGE_DIST = centos8
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-centos7: BASE_DIST = $(*)
build-centos7: DOCKERFILE_SUFFIX := centos
build-centos7: PACKAGE_DIST = $(BASE_DIST)
build-centos7: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-packaging: BASE_DIST := ubuntu20.04
build-packaging: DOCKERFILE_SUFFIX := packaging
build-packaging: PACKAGE_ARCH := amd64
build-packaging: PACKAGE_DIST = all
build-packaging: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
# Test targets
test-%: DIST = $(*)
@@ -133,9 +138,15 @@ $(TEST_TARGETS): test-%:
test-packaging: DIST = packaging
test-packaging:
@echo "Testing package image contents"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/aarch64" || echo "Missing centos7/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/amazonlinux2/aarch64" || echo "Missing amazonlinux2/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/amazonlinux2/x86_64" || echo "Missing amazonlinux2/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/ppc64le" || echo "Missing centos7/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian10/amd64" || echo "Missing debian10/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/opensuse-leap15.1/x86_64" || echo "Missing opensuse-leap15.1/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"

View File

@@ -16,6 +16,20 @@ PUSH_ON_BUILD ?= false
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
REGCTL ?= regctl
$(PUSH_TARGETS): push-%:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE)
push-short:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)
# We only have x86_64 packages for centos7
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
# We only generate amd64 image for ubuntu18.04
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

View File

@@ -13,3 +13,11 @@
# limitations under the License.
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
$(PUSH_TARGETS): push-%:
$(DOCKER) tag "$(IMAGE)" "$(OUT_IMAGE)"
$(DOCKER) push "$(OUT_IMAGE)"
push-short:
$(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)"
$(DOCKER) push "$(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)"

View File

@@ -1,31 +0,0 @@
# NVIDIA CDI Hook
The CLI `nvidia-cdi-hook` provides container device runtime hook capabilities when
called by a container runtime, as specific in a
[Container Device Interface](https://tags.cncf.io/container-device-interface/blob/main/SPEC.md)
file.
## Generating a CDI
The CDI itself is created for an NVIDIA-capable device using the
[`nvidia-ctk cdi generate`](../nvidia-ctk/) command.
When `nvidia-ctk cdi generate` is run, the CDI specification is generated as a yaml file.
The CDI specification provides instructions for a container runtime to set up devices, files and
other resources for the container prior to starting it. Those instructions
may include executing command-line tools to prepare the filesystem. The execution
of such command-line tools is called a hook.
`nvidia-cdi-hook` is the CLI tool that is expected to be called by the container runtime,
when specified by the CDI file.
See the [`nvidia-ctk` documentation](../nvidia-ctk/README.md) for more information
on generating a CDI file.
## Functionality
The `nvidia-cdi-hook` CLI provides the following functionality:
* `chmod` - Change the permissions of a file or directory inside the directory path to be mounted into a container.
* `create-symlinks` - Create symlinks inside the directory path to be mounted into a container.
* `update-ldcache` - Update the dynamic linker cache inside the directory path to be mounted into a container.

View File

@@ -1,36 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package commands
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
// New creates the commands associated with supported CDI hooks.
// These are shared by the nvidia-cdi-hook and nvidia-ctk hook commands.
func New(logger logger.Interface) []*cli.Command {
return []*cli.Command{
ldcache.NewCommand(logger),
symlinks.NewCommand(logger),
chmod.NewCommand(logger),
}
}

View File

@@ -1,93 +0,0 @@
/**
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands"
)
// options defines the options that can be set for the CLI through config files,
// environment variables, or command line flags
type options struct {
// Debug indicates whether the CLI is started in "debug" mode
Debug bool
// Quiet indicates whether the CLI is started in "quiet" mode
Quiet bool
}
func main() {
logger := logrus.New()
// Create a options struct to hold the parsed environment variables or command line flags
opts := options{}
// Create the top-level CLI
c := cli.NewApp()
c.Name = "NVIDIA CDI Hook"
c.UseShortOptionHandling = true
c.EnableBashCompletion = true
c.Usage = "Command to structure files for usage inside a container, called as hooks from a container runtime, defined in a CDI yaml file"
c.Version = info.GetVersionString()
// Setup the flags for this command
c.Flags = []cli.Flag{
&cli.BoolFlag{
Name: "debug",
Aliases: []string{"d"},
Usage: "Enable debug-level logging",
Destination: &opts.Debug,
EnvVars: []string{"NVIDIA_CDI_DEBUG"},
},
&cli.BoolFlag{
Name: "quiet",
Usage: "Suppress all output except for errors; overrides --debug",
Destination: &opts.Quiet,
EnvVars: []string{"NVIDIA_CDI_QUIET"},
},
}
// Set log-level for all subcommands
c.Before = func(c *cli.Context) error {
logLevel := logrus.InfoLevel
if opts.Debug {
logLevel = logrus.DebugLevel
}
if opts.Quiet {
logLevel = logrus.ErrorLevel
}
logger.SetLevel(logLevel)
return nil
}
// Define the subcommands
c.Commands = commands.New(logger)
// Run the CLI
err := c.Run(os.Args)
if err != nil {
logger.Errorf("%v", err)
os.Exit(1)
}
}

View File

@@ -1,197 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
type command struct {
logger logger.Interface
}
type options struct {
folders cli.StringSlice
ldconfigPath string
containerSpec string
}
// NewCommand constructs an update-ldcache command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build the update-ldcache command
func (m command) build() *cli.Command {
cfg := options{}
// Create the 'update-ldcache' command
c := cli.Command{
Name: "update-ldcache",
Usage: "Update ldcache in a container by running ldconfig",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "folder",
Usage: "Specify a folder to add to /etc/ld.so.conf before updating the ld cache",
Destination: &cfg.folders,
},
&cli.StringFlag{
Name: "ldconfig-path",
Usage: "Specify the path to the ldconfig program",
Destination: &cfg.ldconfigPath,
Value: "/sbin/ldconfig",
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *options) error {
if cfg.ldconfigPath == "" {
return errors.New("ldconfig-path must be specified")
}
return nil
}
func (m command) run(c *cli.Context, cfg *options) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
ldconfigPath := m.resolveLDConfigPath(cfg.ldconfigPath)
args := []string{filepath.Base(ldconfigPath)}
if containerRoot != "" {
args = append(args, "-r", containerRoot)
}
if root(containerRoot).hasPath("/etc/ld.so.cache") {
args = append(args, "-C", "/etc/ld.so.cache")
} else {
m.logger.Debugf("No ld.so.cache found, skipping update")
args = append(args, "-N")
}
folders := cfg.folders.Value()
if root(containerRoot).hasPath("/etc/ld.so.conf.d") {
err := m.createConfig(containerRoot, folders)
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
}
} else {
args = append(args, folders...)
}
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
// be configured to use a different config file by default.
args = append(args, "-f", "/etc/ld.so.conf")
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(ldconfigPath, args, nil)
}
type root string
func (r root) hasPath(path string) bool {
_, err := os.Stat(filepath.Join(string(r), path))
if err != nil && os.IsNotExist(err) {
return false
}
return true
}
// resolveLDConfigPath determines the LDConfig path to use for the system.
// On systems such as Ubuntu where `/sbin/ldconfig` is a wrapper around
// /sbin/ldconfig.real, the latter is returned.
func (m command) resolveLDConfigPath(path string) string {
return strings.TrimPrefix(config.NormalizeLDConfigPath("@"+path), "@")
}
// createConfig creates (or updates) /etc/ld.so.conf.d/00-nvcr-<RANDOM_STRING>.conf in the container
// to include the required paths.
// Note that the 00-nvcr prefix is chosen to ensure that these libraries have
// a higher precedence than other libraries on the system but are applied AFTER
// 00-cuda-compat.conf.
func (m command) createConfig(root string, folders []string) error {
if len(folders) == 0 {
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
return nil
}
if err := os.MkdirAll(filepath.Join(root, "/etc/ld.so.conf.d"), 0755); err != nil {
return fmt.Errorf("failed to create ld.so.conf.d: %v", err)
}
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "00-nvcr-*.conf")
if err != nil {
return fmt.Errorf("failed to create config file: %v", err)
}
defer configFile.Close()
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
configured := make(map[string]bool)
for _, folder := range folders {
if configured[folder] {
continue
}
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
}
configured[folder] = true
}
// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := os.Chmod(configFile.Name(), 0644); err != nil {
return fmt.Errorf("failed to chmod config file: %v", err)
}
return nil
}

View File

@@ -2,6 +2,15 @@ package main
import (
"log"
"strings"
)
const (
allDriverCapabilities = DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx")
defaultDriverCapabilities = DriverCapabilities("utility,compute")
none = DriverCapabilities("")
all = DriverCapabilities("all")
)
func capabilityToCLI(cap string) string {
@@ -25,3 +34,50 @@ func capabilityToCLI(cap string) string {
}
return ""
}
// DriverCapabilities is used to process the NVIDIA_DRIVER_CAPABILITIES environment
// variable. Operations include default values, filtering, and handling meta values such as "all"
type DriverCapabilities string
// Intersection returns intersection between two sets of capabilities.
func (d DriverCapabilities) Intersection(capabilities DriverCapabilities) DriverCapabilities {
if capabilities == all {
return d
}
if d == all {
return capabilities
}
lookup := make(map[string]bool)
for _, c := range d.list() {
lookup[c] = true
}
var found []string
for _, c := range capabilities.list() {
if lookup[c] {
found = append(found, c)
}
}
intersection := DriverCapabilities(strings.Join(found, ","))
return intersection
}
// String returns the string representation of the driver capabilities
func (d DriverCapabilities) String() string {
return string(d)
}
// list returns the driver capabilities as a list
func (d DriverCapabilities) list() []string {
var caps []string
for _, c := range strings.Split(string(d), ",") {
trimmed := strings.TrimSpace(c)
if len(trimmed) == 0 {
continue
}
caps = append(caps, trimmed)
}
return caps
}

View File

@@ -0,0 +1,134 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestDriverCapabilitiesIntersection(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
supportedCapabilities DriverCapabilities
expectedIntersection DriverCapabilities
}{
{
capabilities: none,
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: all,
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: all,
supportedCapabilities: allDriverCapabilities,
expectedIntersection: allDriverCapabilities,
},
{
capabilities: allDriverCapabilities,
supportedCapabilities: all,
expectedIntersection: allDriverCapabilities,
},
{
capabilities: none,
supportedCapabilities: all,
expectedIntersection: none,
},
{
capabilities: none,
supportedCapabilities: DriverCapabilities("cap1"),
expectedIntersection: none,
},
{
capabilities: DriverCapabilities("cap0,cap1"),
supportedCapabilities: DriverCapabilities("cap1,cap0"),
expectedIntersection: DriverCapabilities("cap0,cap1"),
},
{
capabilities: defaultDriverCapabilities,
supportedCapabilities: allDriverCapabilities,
expectedIntersection: defaultDriverCapabilities,
},
{
capabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
supportedCapabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
expectedIntersection: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
{
capabilities: DriverCapabilities("cap1"),
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
supportedCapabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
expectedIntersection: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
intersection := tc.supportedCapabilities.Intersection(tc.capabilities)
require.EqualValues(t, tc.expectedIntersection, intersection)
})
}
}
func TestDriverCapabilitiesList(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
expected []string
}{
{
capabilities: DriverCapabilities(""),
},
{
capabilities: DriverCapabilities(" "),
},
{
capabilities: DriverCapabilities(","),
},
{
capabilities: DriverCapabilities(",cap"),
expected: []string{"cap"},
},
{
capabilities: DriverCapabilities("cap,"),
expected: []string{"cap"},
},
{
capabilities: DriverCapabilities("cap0,,cap1"),
expected: []string{"cap0", "cap1"},
},
{
capabilities: DriverCapabilities("cap1,cap0,cap3"),
expected: []string{"cap1", "cap0", "cap3"},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
require.EqualValues(t, tc.expected, tc.capabilities.list())
})
}
}

View File

@@ -9,10 +9,8 @@ import (
"path/filepath"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"golang.org/x/mod/semver"
)
const (
@@ -23,7 +21,6 @@ const (
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)
@@ -39,18 +36,15 @@ type nvidiaConfig struct {
Devices string
MigConfigDevices string
MigMonitorDevices string
ImexChannels string
DriverCapabilities string
// Requirements defines the requirements DSL for the container to run.
// This is empty if no specific requirements are needed, or if requirements are
// explicitly disabled.
Requirements []string
Requirements []string
DisableRequire bool
}
type containerConfig struct {
Pid int
Rootfs string
Image image.CUDA
Env map[string]string
Nvidia *nvidiaConfig
}
@@ -136,7 +130,7 @@ func isPrivileged(s *Spec) bool {
}
var caps []string
// If v1.0.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
// If v1.1.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54
rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1")
rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5")
@@ -145,31 +139,28 @@ func isPrivileged(s *Spec) bool {
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
for _, c := range caps {
if c == capSysAdmin {
return true
}
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
} else {
var lc LinuxCapabilities
err := json.Unmarshal(*s.Process.Capabilities, &lc)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
return false
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
caps = lc.Bounding
}
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
process := specs.Process{
Env: s.Process.Env,
for _, c := range caps {
if c == capSysAdmin {
return true
}
}
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
fullSpec := specs.Spec{
Version: *s.Version,
Process: &process,
}
return image.IsPrivileged(&fullSpec)
return false
}
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
@@ -177,7 +168,7 @@ func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *stri
// if specified.
var hasSwarmEnvvar bool
for _, envvar := range swarmResourceEnvvars {
if image.HasEnvvar(envvar) {
if _, exists := image[envvar]; exists {
hasSwarmEnvvar = true
break
}
@@ -260,39 +251,26 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
return nil
}
func getMigConfigDevices(image image.CUDA) *string {
return getMigDevices(image, envNVMigConfigDevices)
}
func getMigMonitorDevices(image image.CUDA) *string {
return getMigDevices(image, envNVMigMonitorDevices)
}
func getMigDevices(image image.CUDA, envvar string) *string {
if !image.HasEnvvar(envvar) {
return nil
func getMigConfigDevices(env map[string]string) *string {
if devices, ok := env[envNVMigConfigDevices]; ok {
return &devices
}
devices := image.Getenv(envvar)
return &devices
return nil
}
func getImexChannels(image image.CUDA) *string {
if !image.HasEnvvar(envNVImexChannels) {
return nil
func getMigMonitorDevices(env map[string]string) *string {
if devices, ok := env[envNVMigMonitorDevices]; ok {
return &devices
}
chans := image.Getenv(envNVImexChannels)
return &chans
return nil
}
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
func getDriverCapabilities(env map[string]string, supportedDriverCapabilities DriverCapabilities, legacyImage bool) DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
capabilities := supportedDriverCapabilities.Intersection(defaultDriverCapabilities)
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
if !capsEnvSpecified && legacyImage {
// Environment variable unset with legacy image: set all capabilities.
@@ -301,9 +279,9 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
if capsEnvSpecified && len(capsEnv) > 0 {
// If the envvironment variable is specified and is non-empty, use the capabilities value
envCapabilities := image.NewDriverCapabilities(capsEnv)
envCapabilities := DriverCapabilities(capsEnv)
capabilities = supportedDriverCapabilities.Intersection(envCapabilities)
if !envCapabilities.IsAll() && len(capabilities) != len(envCapabilities) {
if envCapabilities != all && capabilities != envCapabilities {
log.Panicln(fmt.Errorf("unsupported capabilities found in '%v' (allowed '%v')", envCapabilities, capabilities))
}
}
@@ -338,25 +316,22 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
var imexChannels string
if c := getImexChannels(image); c != nil {
imexChannels = *c
}
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
requirements, err := image.GetRequirements()
if err != nil {
log.Panicln("failed to get requirements", err)
}
disableRequire := image.HasDisableRequire()
return &nvidiaConfig{
Devices: devices,
MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices,
ImexChannels: imexChannels,
DriverCapabilities: driverCapabilities,
Requirements: requirements,
DisableRequire: disableRequire,
}
}
@@ -374,10 +349,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
s := loadSpec(path.Join(b, "config.json"))
image, err := image.New(
image.WithEnv(s.Process.Env),
image.WithDisableRequire(hook.DisableRequire),
)
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
if err != nil {
log.Panicln(err)
}
@@ -386,7 +358,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
return containerConfig{
Pid: h.Pid,
Rootfs: s.Root.Path,
Image: image,
Env: image,
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
}
}

View File

@@ -5,9 +5,8 @@ import (
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/stretchr/testify/require"
)
func TestGetNvidiaConfig(t *testing.T) {
@@ -39,8 +38,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: image.SupportedDriverCapabilities.String(),
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -52,8 +52,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: image.SupportedDriverCapabilities.String(),
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -83,8 +84,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "",
DriverCapabilities: image.SupportedDriverCapabilities.String(),
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -96,8 +98,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: image.SupportedDriverCapabilities.String(),
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -110,8 +113,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -124,8 +128,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: image.SupportedDriverCapabilities.String(),
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -138,8 +143,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "display,video",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -154,8 +160,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "display,video",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: false,
},
},
{
@@ -171,8 +178,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "display,video",
Requirements: []string{},
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: true,
},
},
{
@@ -201,8 +209,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -232,8 +241,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -245,8 +255,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -259,8 +270,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -273,8 +285,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: image.SupportedDriverCapabilities.String(),
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -287,8 +300,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "display,video",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -303,8 +317,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "display,video",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: false,
},
},
{
@@ -320,8 +335,9 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "display,video",
Requirements: []string{},
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: true,
},
},
{
@@ -333,8 +349,9 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{},
DisableRequire: false,
},
},
{
@@ -348,8 +365,9 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
MigConfigDevices: "mig0,mig1",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -373,8 +391,9 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
MigMonitorDevices: "mig0,mig1",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -399,7 +418,7 @@ func TestGetNvidiaConfig(t *testing.T) {
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: "display,video",
DriverCapabilities: "video,display",
},
},
{
@@ -414,7 +433,7 @@ func TestGetNvidiaConfig(t *testing.T) {
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: "display,video",
DriverCapabilities: "video,display",
},
},
{
@@ -428,7 +447,7 @@ func TestGetNvidiaConfig(t *testing.T) {
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
},
},
{
@@ -439,12 +458,15 @@ func TestGetNvidiaConfig(t *testing.T) {
},
privileged: true,
hookConfig: &HookConfig{
SwarmResource: "DOCKER_SWARM_RESOURCE",
SwarmResource: func() *string {
s := "DOCKER_SWARM_RESOURCE"
return &s
}(),
SupportedDriverCapabilities: "video,display,utility,compute",
},
expectedConfig: &nvidiaConfig{
Devices: "GPU1,GPU2",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
},
},
{
@@ -455,29 +477,29 @@ func TestGetNvidiaConfig(t *testing.T) {
},
privileged: true,
hookConfig: &HookConfig{
SwarmResource: "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE",
SwarmResource: func() *string {
s := "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE"
return &s
}(),
SupportedDriverCapabilities: "video,display,utility,compute",
},
expectedConfig: &nvidiaConfig{
Devices: "GPU1,GPU2",
DriverCapabilities: image.DefaultDriverCapabilities.String(),
DriverCapabilities: defaultDriverCapabilities.String(),
},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
image, _ := image.New(
image.WithEnvMap(tc.env),
)
// Wrap the call to getNvidiaConfig() in a closure.
var config *nvidiaConfig
getConfig := func() {
hookConfig := tc.hookConfig
if hookConfig == nil {
defaultConfig, _ := getDefaultHookConfig()
defaultConfig := getDefaultHookConfig()
hookConfig = &defaultConfig
}
config = getNvidiaConfig(hookConfig, image, nil, tc.privileged)
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
}
// For any tests that are expected to panic, make sure they do.
@@ -503,6 +525,7 @@ func TestGetNvidiaConfig(t *testing.T) {
require.Equal(t, tc.expectedConfig.DriverCapabilities, config.DriverCapabilities)
require.ElementsMatch(t, tc.expectedConfig.Requirements, config.Requirements)
require.Equal(t, tc.expectedConfig.DisableRequire, config.DisableRequire)
})
}
}
@@ -682,17 +705,13 @@ func TestDeviceListSourcePriority(t *testing.T) {
// Wrap the call to getDevices() in a closure.
var devices *string
getDevices := func() {
image, _ := image.New(
image.WithEnvMap(
map[string]string{
envNVVisibleDevices: tc.envvarDevices,
},
),
)
hookConfig, _ := getDefaultHookConfig()
env := map[string]string{
envNVVisibleDevices: tc.envvarDevices,
}
hookConfig := getDefaultHookConfig()
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged)
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
}
// For all other tests, just grab the devices and check the results
@@ -913,10 +932,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
for i, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
image, _ := image.New(
image.WithEnvMap(tc.env),
)
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
if tc.expectedDevices == nil {
require.Nil(t, devices, "%d: %v", i, tc)
return
@@ -930,7 +946,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
func TestGetDriverCapabilities(t *testing.T) {
supportedCapabilities := "compute,display,utility,video"
supportedCapabilities := "compute,utility,display,video"
testCases := []struct {
description string
@@ -965,7 +981,7 @@ func TestGetDriverCapabilities(t *testing.T) {
},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: image.DefaultDriverCapabilities.String(),
expectedCapabilities: defaultDriverCapabilities.String(),
},
{
description: "Env unset for legacy image is 'all'",
@@ -988,7 +1004,7 @@ func TestGetDriverCapabilities(t *testing.T) {
env: map[string]string{},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: image.DefaultDriverCapabilities.String(),
expectedCapabilities: defaultDriverCapabilities.String(),
},
{
description: "Env is all for modern image",
@@ -1006,7 +1022,7 @@ func TestGetDriverCapabilities(t *testing.T) {
},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: image.DefaultDriverCapabilities.String(),
expectedCapabilities: defaultDriverCapabilities.String(),
},
{
description: "Invalid capabilities panic",
@@ -1026,17 +1042,11 @@ func TestGetDriverCapabilities(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
var capabilities string
var capabilites DriverCapabilities
c := HookConfig{
SupportedDriverCapabilities: tc.supportedCapabilities,
}
image, _ := image.New(
image.WithEnvMap(tc.env),
)
getDriverCapabilities := func() {
capabilities = c.getDriverCapabilities(image, tc.legacyImage).String()
supportedCapabilities := DriverCapabilities(tc.supportedCapabilities)
capabilites = getDriverCapabilities(tc.env, supportedCapabilities, tc.legacyImage)
}
if tc.expectedPanic {
@@ -1045,7 +1055,7 @@ func TestGetDriverCapabilities(t *testing.T) {
}
getDriverCapabilities()
require.EqualValues(t, tc.expectedCapabilities, capabilities)
require.EqualValues(t, tc.expectedCapabilities, capabilites)
})
}
}

View File

@@ -1,15 +1,14 @@
package main
import (
"fmt"
"log"
"os"
"path"
"reflect"
"strings"
"github.com/BurntSushi/toml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
)
const (
@@ -17,64 +16,91 @@ const (
driverPath = "/run/nvidia/driver"
)
var defaultPaths = [...]string{
path.Join(driverPath, configPath),
configPath,
}
// CLIConfig : options for nvidia-container-cli.
type CLIConfig struct {
Root *string `toml:"root"`
Path *string `toml:"path"`
Environment []string `toml:"environment"`
Debug *string `toml:"debug"`
Ldcache *string `toml:"ldcache"`
LoadKmods bool `toml:"load-kmods"`
NoPivot bool `toml:"no-pivot"`
NoCgroups bool `toml:"no-cgroups"`
User *string `toml:"user"`
Ldconfig *string `toml:"ldconfig"`
}
// HookConfig : options for the nvidia-container-runtime-hook.
type HookConfig config.Config
type HookConfig struct {
DisableRequire bool `toml:"disable-require"`
SwarmResource *string `toml:"swarm-resource"`
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"`
func getDefaultHookConfig() (HookConfig, error) {
defaultCfg, err := config.GetDefault()
if err != nil {
return HookConfig{}, err
}
return *(*HookConfig)(defaultCfg), nil
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"`
}
// loadConfig loads the required paths for the hook config.
func loadConfig() (*config.Config, error) {
var configPaths []string
var required bool
if len(*configflag) != 0 {
configPaths = append(configPaths, *configflag)
required = true
} else {
configPaths = append(configPaths, path.Join(driverPath, configPath), configPath)
func getDefaultHookConfig() HookConfig {
return HookConfig{
DisableRequire: false,
SwarmResource: nil,
AcceptEnvvarUnprivileged: true,
AcceptDeviceListAsVolumeMounts: false,
SupportedDriverCapabilities: allDriverCapabilities,
NvidiaContainerCLI: CLIConfig{
Root: nil,
Path: nil,
Environment: []string{},
Debug: nil,
Ldcache: nil,
LoadKmods: true,
NoPivot: false,
NoCgroups: false,
User: nil,
Ldconfig: nil,
},
NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(),
}
}
for _, p := range configPaths {
cfg, err := config.New(
config.WithConfigFile(p),
config.WithRequired(true),
)
if err == nil {
return cfg.Config()
} else if os.IsNotExist(err) && !required {
continue
func getHookConfig() (config HookConfig) {
var err error
if len(*configflag) > 0 {
config = getDefaultHookConfig()
_, err = toml.DecodeFile(*configflag, &config)
if err != nil {
log.Panicln("couldn't open configuration file:", err)
}
} else {
for _, p := range defaultPaths {
config = getDefaultHookConfig()
_, err = toml.DecodeFile(p, &config)
if err == nil {
break
} else if !os.IsNotExist(err) {
log.Panicln("couldn't open default configuration file:", err)
}
}
return nil, fmt.Errorf("couldn't open required configuration file: %v", err)
}
return config.GetDefault()
}
func getHookConfig() (*HookConfig, error) {
cfg, err := loadConfig()
if err != nil {
return nil, fmt.Errorf("failed to load config: %v", err)
if config.SupportedDriverCapabilities == all {
config.SupportedDriverCapabilities = allDriverCapabilities
}
config := (*HookConfig)(cfg)
allSupportedDriverCapabilities := image.SupportedDriverCapabilities
if config.SupportedDriverCapabilities == "all" {
config.SupportedDriverCapabilities = allSupportedDriverCapabilities.String()
}
configuredCapabilities := image.NewDriverCapabilities(config.SupportedDriverCapabilities)
// We ensure that the configured value is a subset of all supported capabilities
if !allSupportedDriverCapabilities.IsSuperset(configuredCapabilities) {
// We ensure that the supported-driver-capabilites option is a subset of allDriverCapabilities
if intersection := allDriverCapabilities.Intersection(config.SupportedDriverCapabilities); intersection != config.SupportedDriverCapabilities {
configName := config.getConfigOption("SupportedDriverCapabilities")
log.Panicf("Invalid value for config option '%v'; %v (supported: %v)\n", configName, config.SupportedDriverCapabilities, allSupportedDriverCapabilities.String())
log.Panicf("Invalid value for config option '%v'; %v (supported: %v)\n", configName, config.SupportedDriverCapabilities, allDriverCapabilities)
}
return config, nil
return config
}
// getConfigOption returns the toml config option associated with the
@@ -94,11 +120,11 @@ func (c HookConfig) getConfigOption(fieldName string) string {
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
func (c *HookConfig) getSwarmResourceEnvvars() []string {
if c.SwarmResource == "" {
if c.SwarmResource == nil {
return nil
}
candidates := strings.Split(c.SwarmResource, ",")
candidates := strings.Split(*c.SwarmResource, ",")
var envvars []string
for _, c := range candidates {

View File

@@ -22,24 +22,22 @@ import (
"testing"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
)
func TestGetHookConfig(t *testing.T) {
testCases := []struct {
lines []string
expectedPanic bool
expectedDriverCapabilities string
expectedDriverCapabilities DriverCapabilities
}{
{
expectedDriverCapabilities: image.SupportedDriverCapabilities.String(),
expectedDriverCapabilities: allDriverCapabilities,
},
{
lines: []string{
"supported-driver-capabilities = \"all\"",
},
expectedDriverCapabilities: image.SupportedDriverCapabilities.String(),
expectedDriverCapabilities: allDriverCapabilities,
},
{
lines: []string{
@@ -49,19 +47,19 @@ func TestGetHookConfig(t *testing.T) {
},
{
lines: []string{},
expectedDriverCapabilities: image.SupportedDriverCapabilities.String(),
expectedDriverCapabilities: allDriverCapabilities,
},
{
lines: []string{
"supported-driver-capabilities = \"\"",
},
expectedDriverCapabilities: "",
expectedDriverCapabilities: none,
},
{
lines: []string{
"supported-driver-capabilities = \"compute,utility\"",
"supported-driver-capabilities = \"utility,compute\"",
},
expectedDriverCapabilities: "compute,utility",
expectedDriverCapabilities: DriverCapabilities("utility,compute"),
},
}
@@ -91,8 +89,7 @@ func TestGetHookConfig(t *testing.T) {
var config HookConfig
getHookConfig := func() {
c, _ := getHookConfig()
config = *c
config = getHookConfig()
}
if tc.expectedPanic {
@@ -112,6 +109,10 @@ func TestGetSwarmResourceEnvvars(t *testing.T) {
value string
expected []string
}{
{
value: "nil",
expected: nil,
},
{
value: "",
expected: nil,
@@ -145,7 +146,12 @@ func TestGetSwarmResourceEnvvars(t *testing.T) {
for i, tc := range testCases {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
c := &HookConfig{
SwarmResource: tc.value,
SwarmResource: func() *string {
if tc.value == "nil" {
return nil
}
return &tc.value
}(),
}
envvars := c.getSwarmResourceEnvvars()

View File

@@ -13,9 +13,7 @@ import (
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
@@ -38,12 +36,16 @@ func exit() {
os.Exit(0)
}
func getCLIPath(config config.ContainerCLIConfig) string {
if config.Path != "" {
return config.Path
func getCLIPath(config CLIConfig) string {
if config.Path != nil {
return *config.Path
}
if err := os.Setenv("PATH", lookup.GetPath(config.Root)); err != nil {
var root string
if config.Root != nil {
root = *config.Root
}
if err := os.Setenv("PATH", lookup.GetPath(root)); err != nil {
log.Panicln("couldn't set PATH variable:", err)
}
@@ -69,28 +71,25 @@ func doPrestart() {
defer exit()
log.SetFlags(0)
hook, err := getHookConfig()
if err != nil || hook == nil {
log.Panicln("error getting hook config:", err)
}
cli := hook.NVIDIAContainerCLIConfig
hook := getHookConfig()
cli := hook.NvidiaContainerCLI
container := getContainerConfig(*hook)
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
}
container := getContainerConfig(hook)
nvidia := container.Nvidia
if nvidia == nil {
// Not a GPU container, nothing to do.
return
}
if !hook.NVIDIAContainerRuntimeHookConfig.SkipModeDetection && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntimeConfig.Mode, container.Image) != "legacy" {
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
}
rootfs := getRootfsPath(container)
args := []string{getCLIPath(cli)}
if cli.Root != "" {
args = append(args, fmt.Sprintf("--root=%s", cli.Root))
if cli.Root != nil {
args = append(args, fmt.Sprintf("--root=%s", *cli.Root))
}
if cli.LoadKmods {
args = append(args, "--load-kmods")
@@ -100,19 +99,19 @@ func doPrestart() {
}
if *debugflag {
args = append(args, "--debug=/dev/stderr")
} else if cli.Debug != "" {
args = append(args, fmt.Sprintf("--debug=%s", cli.Debug))
} else if cli.Debug != nil {
args = append(args, fmt.Sprintf("--debug=%s", *cli.Debug))
}
if cli.Ldcache != "" {
args = append(args, fmt.Sprintf("--ldcache=%s", cli.Ldcache))
if cli.Ldcache != nil {
args = append(args, fmt.Sprintf("--ldcache=%s", *cli.Ldcache))
}
if cli.User != "" {
args = append(args, fmt.Sprintf("--user=%s", cli.User))
if cli.User != nil {
args = append(args, fmt.Sprintf("--user=%s", *cli.User))
}
args = append(args, "configure")
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
if cli.Ldconfig != nil {
args = append(args, fmt.Sprintf("--ldconfig=%s", *cli.Ldconfig))
}
if cli.NoCgroups {
args = append(args, "--no-cgroups")
@@ -126,9 +125,6 @@ func doPrestart() {
if len(nvidia.MigMonitorDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
}
if len(nvidia.ImexChannels) > 0 {
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
}
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
if len(cap) == 0 {
@@ -137,15 +133,16 @@ func doPrestart() {
args = append(args, capabilityToCLI(cap))
}
for _, req := range nvidia.Requirements {
args = append(args, fmt.Sprintf("--require=%s", req))
if !hook.DisableRequire && !nvidia.DisableRequire {
for _, req := range nvidia.Requirements {
args = append(args, fmt.Sprintf("--require=%s", req))
}
}
args = append(args, fmt.Sprintf("--pid=%s", strconv.FormatUint(uint64(container.Pid), 10)))
args = append(args, rootfs)
env := append(os.Environ(), cli.Environment...)
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection?
err = syscall.Exec(args[0], args, env)
log.Panicln("exec failed:", err)
}
@@ -188,11 +185,11 @@ func main() {
}
}
// logInterceptor implements the logger.Interface to allow for logging from executable.
type logInterceptor struct {
logger.NullLogger
}
// logInterceptor implements the info.Logger interface to allow for logging from this function.
type logInterceptor struct{}
func (l *logInterceptor) Infof(format string, args ...interface{}) {
log.Printf(format, args...)
}
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}

View File

@@ -1,34 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)
func main() {
rt := runtime.New(
runtime.WithModeOverride("cdi"),
)
err := rt.Run(os.Args)
if err != nil {
os.Exit(1)
}
}

View File

@@ -1,34 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)
func main() {
rt := runtime.New(
runtime.WithModeOverride("legacy"),
)
err := rt.Run(os.Args)
if err != nil {
os.Exit(1)
}
}

View File

@@ -85,126 +85,3 @@ Alternatively the NVIDIA Container Runtime can be set as the default runtime for
}
}
```
## Environment variables (OCI spec)
Each environment variable maps to an command-line argument for `nvidia-container-cli` from [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
These variables are already set in our [official CUDA images](https://hub.docker.com/r/nvidia/cuda/).
### `NVIDIA_VISIBLE_DEVICES`
This variable controls which GPUs will be made accessible inside the container.
#### Possible values
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
* `all`: all GPUs will be accessible, this is the default value in our container images.
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
**Note**: When running on a MIG capable device, the following values will also be available:
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
```
$ nvidia-smi -L
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
```
### `NVIDIA_MIG_CONFIG_DEVICES`
This variable controls which of the visible GPUs can have their MIG
configuration managed from within the container. This includes enabling and
disabling MIG mode, creating and destroying GPU Instances and Compute
Instances, etc.
#### Possible values
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
MIG configurations managed.
**Note**:
* This feature is only available on MIG capable devices (e.g. the A100).
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
* When not running as `root`, the container user must have read access to the
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
### `NVIDIA_MIG_MONITOR_DEVICES`
This variable controls which of the visible GPUs can have aggregate information
about all of their MIG devices monitored from within the container. This
includes inspecting the aggregate memory usage, listing the aggregate running
processes, etc.
#### Possible values
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
MIG devices monitored.
**Note**:
* This feature is only available on MIG capable devices (e.g. the A100).
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
* When not running as `root`, the container user must have read access to the
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
### `NVIDIA_DRIVER_CAPABILITIES`
This option controls which driver libraries/binaries will be mounted inside the container.
#### Possible values
* `compute,video`, `graphics,utility` …: a comma-separated list of driver features the container needs.
* `all`: enable all available driver capabilities.
* *empty* or *unset*: use default driver capability: `utility,compute`.
#### Supported driver capabilities
* `compute`: required for CUDA and OpenCL applications.
* `compat32`: required for running 32-bit applications.
* `graphics`: required for running OpenGL and Vulkan applications.
* `utility`: required for using `nvidia-smi` and NVML.
* `video`: required for using the Video Codec SDK.
* `display`: required for leveraging X11 display.
### `NVIDIA_REQUIRE_*`
A logical expression to define constraints on the configurations supported by the container.
#### Supported constraints
* `cuda`: constraint on the CUDA driver version.
* `driver`: constraint on the driver version.
* `arch`: constraint on the compute architectures of the selected GPUs.
* `brand`: constraint on the brand of the selected GPUs (e.g. GeForce, Tesla, GRID).
#### Expressions
Multiple constraints can be expressed in a single environment variable: space-separated constraints are ORed, comma-separated constraints are ANDed.
Multiple environment variables of the form `NVIDIA_REQUIRE_*` are ANDed together.
### `NVIDIA_DISABLE_REQUIRE`
Single switch to disable all the constraints of the form `NVIDIA_REQUIRE_*`.
### `NVIDIA_REQUIRE_CUDA`
The version of the CUDA toolkit used by the container. It is an instance of the generic `NVIDIA_REQUIRE_*` case and it is set by official CUDA images.
If the version of the NVIDIA driver is insufficient to run this version of CUDA, the container will not be started.
#### Possible values
* `cuda>=7.5`, `cuda>=8.0`, `cuda>=9.0` …: any valid CUDA version in the form `major.minor`.
### `CUDA_VERSION`
Similar to `NVIDIA_REQUIRE_CUDA`, for legacy CUDA images.
In addition, if `NVIDIA_REQUIRE_CUDA` is not set, `NVIDIA_VISIBLE_DEVICES` and `NVIDIA_DRIVER_CAPABILITIES` will default to `all`.
## Usage example
**NOTE:** The use of the `nvidia-container-runtime` as CLI replacement for `runc` is uncommon and is only provided for completeness.
Although the `nvidia-container-runtime` is typically configured as a replacement for `runc` or `crun` in various container engines, it can also be
invoked from the command line as `runc` would. For example:
```sh
# Setup a rootfs based on Ubuntu 16.04
cd $(mktemp -d) && mkdir rootfs
curl -sS http://cdimage.ubuntu.com/ubuntu-base/releases/16.04/release/ubuntu-base-16.04.6-base-amd64.tar.gz | tar --exclude 'dev/*' -C rootfs -xz
# Create an OCI runtime spec
nvidia-container-runtime spec
sed -i 's;"sh";"nvidia-smi";' config.json
sed -i 's;\("TERM=xterm"\);\1, "NVIDIA_VISIBLE_DEVICES=0";' config.json
# Run the container
sudo nvidia-container-runtime run nvidia_smi
```

View File

@@ -14,10 +14,9 @@
# limitations under the License.
*/
package runtime
package main
import (
"errors"
"fmt"
"io"
"os"
@@ -27,35 +26,27 @@ import (
"strings"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
// Logger adds a way to manage output to a log file to a logrus.Logger
type Logger struct {
logger.Interface
previousLogger logger.Interface
*logrus.Logger
previousLogger *logrus.Logger
logFiles []*os.File
}
// NewLogger creates an empty logger
func NewLogger() *Logger {
return &Logger{
Interface: logrus.New(),
Logger: logrus.New(),
}
}
// Update constructs a Logger with a preddefined formatter
func (l *Logger) Update(filename string, logLevel string, argv []string) {
// UpdateLogger constructs a Logger with a preddefined formatter
func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, error) {
configFromArgs := parseArgs(argv)
level, logLevelError := configFromArgs.getLevel(logLevel)
defer func() {
if logLevelError != nil {
l.Warning(logLevelError)
}
}()
var logFiles []*os.File
var argLogFileError error
@@ -64,7 +55,7 @@ func (l *Logger) Update(filename string, logLevel string, argv []string) {
if !configFromArgs.version {
configLogFile, err := createLogFile(filename)
if err != nil {
argLogFileError = errors.Join(argLogFileError, err)
return logger, fmt.Errorf("error opening debug log file: %v", err)
}
if configLogFile != nil {
logFiles = append(logFiles, configLogFile)
@@ -74,17 +65,16 @@ func (l *Logger) Update(filename string, logLevel string, argv []string) {
if argLogFile != nil {
logFiles = append(logFiles, argLogFile)
}
argLogFileError = errors.Join(argLogFileError, err)
argLogFileError = err
}
defer func() {
if argLogFileError != nil {
l.Warningf("Failed to open log file: %v", argLogFileError)
}
}()
newLogger := logrus.New()
l := &Logger{
Logger: logrus.New(),
previousLogger: logger.Logger,
logFiles: logFiles,
}
newLogger.SetLevel(level)
l.SetLevel(level)
if level == logrus.DebugLevel {
logrus.SetReportCaller(true)
// Shorten function and file names reported by the logger, by
@@ -102,27 +92,30 @@ func (l *Logger) Update(filename string, logLevel string, argv []string) {
}
if configFromArgs.format == "json" {
newLogger.SetFormatter(new(logrus.JSONFormatter))
l.SetFormatter(new(logrus.JSONFormatter))
}
switch len(logFiles) {
case 0:
newLogger.SetOutput(io.Discard)
case 1:
newLogger.SetOutput(logFiles[0])
default:
if len(logFiles) == 0 {
l.SetOutput(io.Discard)
} else if len(logFiles) == 1 {
l.SetOutput(logFiles[0])
} else if len(logFiles) > 1 {
var writers []io.Writer
for _, f := range logFiles {
writers = append(writers, f)
}
newLogger.SetOutput(io.MultiWriter(writers...))
l.SetOutput(io.MultiWriter(writers...))
}
*l = Logger{
Interface: newLogger,
previousLogger: l.Interface,
logFiles: logFiles,
if logLevelError != nil {
l.Warn(logLevelError)
}
if argLogFileError != nil {
l.Warnf("Failed to open log file: %v", argLogFileError)
}
return l, nil
}
// Reset closes the log file (if any) and resets the logger output to what it
@@ -133,9 +126,7 @@ func (l *Logger) Reset() error {
if previous == nil {
previous = logrus.New()
}
l.Interface = previous
l.previousLogger = nil
l.logFiles = nil
logger = &Logger{Logger: previous}
}()
var errs []error
@@ -159,16 +150,11 @@ func (l *Logger) Reset() error {
}
func createLogFile(filename string) (*os.File, error) {
if filename == "" || filename == os.DevNull {
return nil, nil
if filename != "" && filename != os.DevNull {
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
}
if dir := filepath.Dir(filepath.Clean(filename)); dir != "." {
err := os.MkdirAll(dir, 0755)
if err != nil {
return nil, err
}
}
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
return nil, nil
}
type loggerConfig struct {
@@ -236,13 +222,12 @@ func parseArgs(args []string) loggerConfig {
}
var value string
switch {
case len(parts) == 2:
if len(parts) == 2 {
value = parts[2]
case i+1 < len(args):
} else if i+1 < len(args) {
value = args[i+1]
i++
default:
} else {
continue
}

View File

@@ -1,15 +1,89 @@
package main
import (
"fmt"
"os"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/opencontainers/runtime-spec/specs-go"
)
// version must be set by go build's -X main.version= option in the Makefile.
var version = "unknown"
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
var logger = NewLogger()
func main() {
r := runtime.New()
err := r.Run(os.Args)
err := run(os.Args)
if err != nil {
logger.Errorf("%v", err)
os.Exit(1)
}
}
// run is an entry point that allows for idiomatic handling of errors
// when calling from the main function.
func run(argv []string) (rerr error) {
printVersion := hasVersionFlag(argv)
if printVersion {
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
}
cfg, err := config.GetConfig()
if err != nil {
return fmt.Errorf("error loading config: %v", err)
}
logger, err = UpdateLogger(
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
argv,
)
if err != nil {
return fmt.Errorf("failed to set up logger: %v", err)
}
defer func() {
if rerr != nil {
logger.Errorf("%v", rerr)
}
logger.Reset()
}()
logger.Debugf("Command line arguments: %v", argv)
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
if err != nil {
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
}
if printVersion {
fmt.Print("\n")
}
return runtime.Exec(argv)
}
// TODO: This should be refactored / combined with parseArgs in logger.
func hasVersionFlag(args []string) bool {
for i := 0; i < len(args); i++ {
param := args[i]
parts := strings.SplitN(param, "=", 2)
trimmed := strings.TrimLeft(parts[0], "-")
// If this is not a flag we continue
if parts[0] == trimmed {
continue
}
// Check the version flag
if trimmed == "version" {
return true
}
}
return false
}

View File

@@ -3,20 +3,17 @@ package main
import (
"bytes"
"encoding/json"
"io"
"log"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/require"
)
const (
@@ -44,7 +41,7 @@ func TestMain(m *testing.M) {
var err error
moduleRoot, err := test.GetModuleRoot()
if err != nil {
log.Fatalf("error in test setup: could not get module root: %v", err)
logger.Fatalf("error in test setup: could not get module root: %v", err)
}
testBinPath := filepath.Join(moduleRoot, "test", "bin")
testInputPath := filepath.Join(moduleRoot, "test", "input")
@@ -56,11 +53,11 @@ func TestMain(m *testing.M) {
// Confirm that the environment is configured correctly
runcPath, err := exec.LookPath(runcExecutableName)
if err != nil || filepath.Join(testBinPath, runcExecutableName) != runcPath {
log.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
logger.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
}
hookPath, err := exec.LookPath(nvidiaHook)
if err != nil || filepath.Join(testBinPath, nvidiaHook) != hookPath {
log.Fatalf("error in test setup: mock hook path set incorrectly in TestMain(): %v", err)
logger.Fatalf("error in test setup: mock hook path set incorrectly in TestMain(): %v", err)
}
// Store the root and binary paths in the test Config
@@ -80,14 +77,13 @@ func TestMain(m *testing.M) {
// case 1) nvidia-container-runtime run --bundle
// case 2) nvidia-container-runtime create --bundle
// - Confirm the runtime handles bad input correctly
// - Confirm the runtime handles bad input correctly
func TestBadInput(t *testing.T) {
err := cfg.generateNewRuntimeSpec()
if err != nil {
t.Fatal(err)
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
err = cmdCreate.Run()
@@ -95,17 +91,15 @@ func TestBadInput(t *testing.T) {
}
// case 1) nvidia-container-runtime run --bundle <bundle-name> <ctr-name>
// - Confirm the runtime runs with no errors
//
// - Confirm the runtime runs with no errors
// case 2) nvidia-container-runtime create --bundle <bundle-name> <ctr-name>
// - Confirm the runtime inserts the NVIDIA prestart hook correctly
// - Confirm the runtime inserts the NVIDIA prestart hook correctly
func TestGoodInput(t *testing.T) {
err := cfg.generateNewRuntimeSpec()
if err != nil {
t.Fatalf("error generating runtime spec: %v", err)
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
output, err := cmdRun.CombinedOutput()
@@ -116,7 +110,6 @@ func TestGoodInput(t *testing.T) {
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
require.Empty(t, spec.Hooks, "there should be no hooks in config.json")
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
err = cmdCreate.Run()
@@ -162,7 +155,6 @@ func TestDuplicateHook(t *testing.T) {
}
// Test how runtime handles already existing prestart hook in config.json
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
output, err := cmdCreate.CombinedOutput()
@@ -178,8 +170,7 @@ func TestDuplicateHook(t *testing.T) {
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
// testing.
func addNVIDIAHook(spec *specs.Spec) error {
logger, _ := testlog.NewNullLogger()
m := modifier.NewStableRuntimeModifier(logger, nvidiaHook)
m := modifier.NewStableRuntimeModifier(logger.Logger)
return m.Modify(spec)
}
@@ -193,16 +184,15 @@ func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
}
defer jsonFile.Close()
jsonContent, err := io.ReadAll(jsonFile)
switch {
case err != nil:
jsonContent, err := ioutil.ReadAll(jsonFile)
if err != nil {
return spec, err
case json.Valid(jsonContent):
} else if json.Valid(jsonContent) {
err = json.Unmarshal(jsonContent, &spec)
if err != nil {
return spec, err
}
default:
} else {
err = json.NewDecoder(bytes.NewReader(jsonContent)).Decode(&spec)
if err != nil {
return spec, err
@@ -232,7 +222,6 @@ func (c testConfig) generateNewRuntimeSpec() error {
return err
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmd := exec.Command("cp", c.unmodifiedSpecFile(), c.specFilePath())
err = cmd.Run()
if err != nil {

View File

@@ -14,22 +14,21 @@
# limitations under the License.
*/
package runtime
package main
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
"github.com/sirupsen/logrus"
)
// newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger
func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv []string, driver *root.Driver) (oci.Runtime, error) {
func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv []string) (oci.Runtime, error) {
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, cfg.NVIDIAContainerRuntimeConfig.Runtimes)
if err != nil {
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
@@ -45,13 +44,13 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
}
specModifier, err := newSpecModifier(logger, cfg, ociSpec, driver)
specModifier, err := newSpecModifier(logger, cfg, ociSpec, argv)
if err != nil {
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
}
// Create the wrapping runtime with the specified modifier
r := oci.NewModifyingRuntimeWrapper(
r := runtime.NewModifyingRuntimeWrapper(
logger,
lowLevelRuntime,
ociSpec,
@@ -62,33 +61,28 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
}
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
}
image, err := image.NewCUDAImageFromSpec(rawSpec)
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
modeModifier, err := newModeModifier(logger, cfg, ociSpec, argv)
if err != nil {
return nil, err
}
mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image)
modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, image)
if err != nil {
return nil, err
}
// For CDI mode we make no additional modifications.
if mode == "cdi" {
return modeModifier, nil
}
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver)
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, ociSpec)
if err != nil {
return nil, err
}
featureModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image)
gdsModifier, err := modifier.NewGDSModifier(logger, cfg, ociSpec)
if err != nil {
return nil, err
}
mofedModifier, err := modifier.NewMOFEDModifier(logger, cfg, ociSpec)
if err != nil {
return nil, err
}
tegraModifier, err := modifier.NewTegraPlatformFiles(logger)
if err != nil {
return nil, err
}
@@ -96,17 +90,19 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
modifiers := modifier.Merge(
modeModifier,
graphicsModifier,
featureModifier,
gdsModifier,
mofedModifier,
tegraModifier,
)
return modifiers, nil
}
func newModeModifier(logger logger.Interface, mode string, cfg *config.Config, ociSpec oci.Spec, image image.CUDA) (oci.SpecModifier, error) {
switch mode {
func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
case "legacy":
return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil
return modifier.NewStableRuntimeModifier(logger), nil
case "csv":
return modifier.NewCSVModifier(logger, cfg, image)
return modifier.NewCSVModifier(logger, cfg, ociSpec)
case "cdi":
return modifier.NewCDIModifier(logger, cfg, ociSpec)
}

View File

@@ -14,59 +14,22 @@
# limitations under the License.
*/
package runtime
package main
import (
"encoding/json"
"log"
"os"
"os/exec"
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
const (
runcExecutableName = "runc"
)
func TestMain(m *testing.M) {
// TEST SETUP
// Determine the module root and the test binary path
var err error
moduleRoot, err := test.GetModuleRoot()
if err != nil {
log.Fatalf("error in test setup: could not get module root: %v", err)
}
testBinPath := filepath.Join(moduleRoot, "test", "bin")
// Set the environment variables for the test
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
// Confirm that the environment is configured correctly
runcPath, err := exec.LookPath(runcExecutableName)
if err != nil || filepath.Join(testBinPath, runcExecutableName) != runcPath {
log.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
}
// RUN TESTS
exitCode := m.Run()
os.Exit(exitCode)
}
func TestFactoryMethod(t *testing.T) {
logger, _ := testlog.NewNullLogger()
driver := root.New(
root.WithDriverRoot("/nvidia/driver/root"),
)
testCases := []struct {
description string
@@ -147,7 +110,6 @@ func TestFactoryMethod(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
bundleDir := t.TempDir()
specFile, err := os.Create(filepath.Join(bundleDir, "config.json"))
@@ -156,7 +118,7 @@ func TestFactoryMethod(t *testing.T) {
argv := []string{"--bundle", bundleDir, "create"}
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv, driver)
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
if tc.expectedError {
require.Error(t, err)
} else {

View File

@@ -16,34 +16,9 @@ nvidia-ctk runtime configure --set-as-default
will ensure that the NVIDIA Container Runtime is added as the default runtime to the default container
engine.
## Configure the NVIDIA Container Toolkit
The `config` command of the `nvidia-ctk` CLI allows a user to display and manipulate the NVIDIA Container Toolkit
configuration.
For example, running the following command:
```bash
nvidia-ctk config default
```
will display the default config for the detected platform.
Whereas
```bash
nvidia-ctk config
```
will display the effective NVIDIA Container Toolkit config using the configured config file, and running:
Individual config options can be set by specifying these are key-value pairs to the `--set` argument:
```bash
nvidia-ctk config --set nvidia-container-cli.no-cgroups=true
```
By default, all commands output to `STDOUT`, but specifying the `--output` flag writes the config to the specified file.
### Generate CDI specifications
The [Container Device Interface (CDI)](https://tags.cncf.io/container-device-interface) provides
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
available NVIDIA GPUs in a system.

View File

@@ -17,20 +17,17 @@
package cdi
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/list"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
logger *logrus.Logger
}
// NewCommand constructs an info command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
@@ -47,8 +44,6 @@ func (m command) build() *cli.Command {
hook.Subcommands = []*cli.Command{
generate.NewCommand(m.logger),
transform.NewCommand(m.logger),
list.NewCommand(m.logger),
}
return &hook

View File

@@ -14,20 +14,27 @@
# limitations under the License.
**/
package nvcdi
package generate
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
// NewCommonDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
// This includes driver libraries and meta devices, for example.
func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
metaDevices := discover.NewCharDeviceDiscoverer(
l.logger,
l.devRoot,
func NewCommonDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
metaDevices := discover.NewDeviceDiscoverer(
logger,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
[]string{
"/dev/nvidia-modeset",
"/dev/nvidia-uvm-tools",
@@ -36,12 +43,12 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
},
)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(logger, driverRoot)
if err != nil {
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
return nil, fmt.Errorf("error constructing discoverer for graphics mounts: %v", err)
}
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib)
driverFiles, err := NewDriverDiscoverer(logger, driverRoot, nvidiaCTKPath, nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
}

View File

@@ -14,40 +14,64 @@
# limitations under the License.
**/
package nvcdi
package generate
import (
"fmt"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/sirupsen/logrus"
)
type deviceFolderPermissions struct {
logger logger.Interface
devRoot string
nvidiaCDIHookPath string
devices discover.Discover
logger *logrus.Logger
driverRoot string
nvidiaCTKPath string
folders []string
}
var _ discover.Discover = (*deviceFolderPermissions)(nil)
// newDeviceFolderPermissionHookDiscoverer creates a discoverer that can be used to update the permissions for the parent folders of nested device nodes from the specified set of device specs.
// NewDeviceFolderPermissionHookDiscoverer creates a discoverer that can be used to update the permissions for the parent folders of nested device nodes from the specified set of device specs.
// This works around an issue with rootless podman when using crun as a low-level runtime.
// See https://github.com/containers/crun/issues/1047
// The nested devices that are applicable to the NVIDIA GPU devices are:
// - DRM devices at /dev/dri/*
// - NVIDIA Caps devices at /dev/nvidia-caps/*
func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, devRoot string, nvidiaCDIHookPath string, devices discover.Discover) discover.Discover {
d := &deviceFolderPermissions{
logger: logger,
devRoot: devRoot,
nvidiaCDIHookPath: nvidiaCDIHookPath,
devices: devices,
func NewDeviceFolderPermissionHookDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, deviceSpecs []specs.Device) (discover.Discover, error) {
var folders []string
seen := make(map[string]bool)
for _, device := range deviceSpecs {
for _, dn := range device.ContainerEdits.DeviceNodes {
df := filepath.Dir(dn.Path)
if seen[df] {
continue
}
// We only consider the special case paths
if df != "/dev/dri" && df != "/dev/nvidia-caps" {
continue
}
folders = append(folders, df)
seen[df] = true
}
if len(folders) == 2 {
break
}
}
return d
if len(folders) == 0 {
return discover.None{}, nil
}
d := &deviceFolderPermissions{
logger: logger,
driverRoot: driverRoot,
nvidiaCTKPath: nvidiaCTKPath,
folders: folders,
}
return d, nil
}
// Devices are empty for this discoverer
@@ -57,21 +81,17 @@ func (d *deviceFolderPermissions) Devices() ([]discover.Device, error) {
// Hooks returns a set of hooks that sets the file mode to 755 of parent folders for nested device nodes.
func (d *deviceFolderPermissions) Hooks() ([]discover.Hook, error) {
folders, err := d.getDeviceSubfolders()
if err != nil {
return nil, fmt.Errorf("failed to get device subfolders: %v", err)
}
if len(folders) == 0 {
if len(d.folders) == 0 {
return nil, nil
}
args := []string{"--mode", "755"}
for _, folder := range folders {
for _, folder := range d.folders {
args = append(args, "--path", folder)
}
hook := discover.CreateNvidiaCDIHook(
d.nvidiaCDIHookPath,
hook := discover.CreateNvidiaCTKHook(
d.nvidiaCTKPath,
"chmod",
args...,
)
@@ -79,39 +99,6 @@ func (d *deviceFolderPermissions) Hooks() ([]discover.Hook, error) {
return []discover.Hook{hook}, nil
}
func (d *deviceFolderPermissions) getDeviceSubfolders() ([]string, error) {
// For now we only consider the following special case paths
allowedPaths := map[string]bool{
"/dev/dri": true,
"/dev/nvidia-caps": true,
}
devices, err := d.devices.Devices()
if err != nil {
return nil, fmt.Errorf("failed to get devices: %v", err)
}
var folders []string
seen := make(map[string]bool)
for _, device := range devices {
df := filepath.Dir(device.Path)
if seen[df] {
continue
}
// We only consider the special case paths
if !allowedPaths[df] {
continue
}
folders = append(folders, df)
seen[df] = true
if len(folders) == len(allowedPaths) {
break
}
}
return folders, nil
}
// Mounts are empty for this discoverer
func (d *deviceFolderPermissions) Mounts() ([]discover.Mount, error) {
return nil, nil

View File

@@ -0,0 +1,156 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
// The supplied NVML Library is used to query the expected driver version.
func NewDriverDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
version, r := nvmllib.SystemGetDriverVersion()
if r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to determine driver version: %v", r)
}
libraries, err := NewDriverLibraryDiscoverer(logger, driverRoot, nvidiaCTKPath, version)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
}
firmwares := NewDriverFirmwareDiscoverer(logger, driverRoot, version)
binaries := NewDriverBinariesDiscoverer(logger, driverRoot)
d := discover.Merge(
libraries,
firmwares,
binaries,
)
return d, nil
}
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
func NewDriverLibraryDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, version string) (discover.Discover, error) {
libraryPaths, err := getVersionLibs(logger, driverRoot, version)
if err != nil {
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
}
libraries := discover.NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
libraryPaths,
)
cfg := &discover.Config{
DriverRoot: driverRoot,
NvidiaCTKPath: nvidiaCTKPath,
}
hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, cfg)
d := discover.Merge(
libraries,
hooks,
)
return d, nil
}
// NewDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version.
func NewDriverFirmwareDiscoverer(logger *logrus.Logger, driverRoot string, version string) discover.Discover {
gspFirmwarePath := filepath.Join("/lib/firmware/nvidia", version, "gsp*.bin")
return discover.NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
[]string{gspFirmwarePath},
)
}
// NewDriverBinariesDiscoverer creates a discoverer for GSP firmware associated with the GPU driver.
func NewDriverBinariesDiscoverer(logger *logrus.Logger, driverRoot string) discover.Discover {
return discover.NewMounts(
logger,
lookup.NewExecutableLocator(logger, driverRoot),
driverRoot,
[]string{
"nvidia-smi", /* System management interface */
"nvidia-debugdump", /* GPU coredump utility */
"nvidia-persistenced", /* Persistence mode utility */
"nvidia-cuda-mps-control", /* Multi process service CLI */
"nvidia-cuda-mps-server", /* Multi process service server */
},
)
}
// getVersionLibs checks the LDCache for libraries ending in the specified driver version.
// Although the ldcache at the specified driverRoot is queried, the paths are returned relative to this driverRoot.
// This allows the standard mount location logic to be used for resolving the mounts.
func getVersionLibs(logger *logrus.Logger, driverRoot string, version string) ([]string, error) {
logger.Infof("Using driver version %v", version)
cache, err := ldcache.New(logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to load ldcache: %v", err)
}
libs32, libs64 := cache.List()
var libs []string
for _, l := range libs64 {
if strings.HasSuffix(l, version) {
logger.Infof("found 64-bit driver lib: %v", l)
libs = append(libs, l)
}
}
for _, l := range libs32 {
if strings.HasSuffix(l, version) {
logger.Infof("found 32-bit driver lib: %v", l)
libs = append(libs, l)
}
}
if driverRoot == "/" || driverRoot == "" {
return libs, nil
}
var relative []string
for _, l := range libs {
relative = append(relative, strings.TrimPrefix(l, driverRoot))
}
return relative, nil
}

View File

@@ -14,7 +14,7 @@
# limitations under the License.
**/
package nvcdi
package generate
import (
"fmt"
@@ -22,68 +22,25 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
edits, err := l.GetGPUDeviceEdits(d)
if err != nil {
return nil, fmt.Errorf("failed to get edits for device: %v", err)
}
var deviceSpecs []specs.Device
names, err := l.deviceNamers.GetDeviceNames(i, convert{d})
if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err)
}
for _, name := range names {
spec := specs.Device{
Name: name,
ContainerEdits: *edits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, spec)
}
return deviceSpecs, nil
}
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) {
device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCDIHookPath, d)
if err != nil {
return nil, fmt.Errorf("failed to create device discoverer: %v", err)
}
editsForDevice, err := edits.FromDiscoverer(device)
if err != nil {
return nil, fmt.Errorf("failed to create container edits for device: %v", err)
}
return editsForDevice, nil
}
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
type byPathHookDiscoverer struct {
logger logger.Interface
devRoot string
nvidiaCDIHookPath string
pciBusID string
deviceNodes discover.Discover
logger *logrus.Logger
driverRoot string
nvidiaCTKPath string
pciBusID string
}
var _ discover.Discover = (*byPathHookDiscoverer)(nil)
// newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCDIHookPath string, d device.Device) (discover.Discover, error) {
// NewFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
func NewFullGPUDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
minor, ret := d.GetMinorNumber()
@@ -107,29 +64,20 @@ func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCDIHook
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
devRoot,
deviceNodePaths,
driverRoot,
)
byPathHooks := &byPathHookDiscoverer{
logger: logger,
devRoot: devRoot,
nvidiaCDIHookPath: nvidiaCDIHookPath,
pciBusID: pciBusID,
deviceNodes: deviceNodes,
logger: logger,
driverRoot: driverRoot,
nvidiaCTKPath: nvidiaCTKPath,
pciBusID: pciBusID,
}
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
logger,
devRoot,
nvidiaCDIHookPath,
deviceNodes,
)
dd := discover.Merge(
deviceNodes,
byPathHooks,
deviceFolderPermissionHooks,
)
return dd, nil
@@ -157,8 +105,8 @@ func (d *byPathHookDiscoverer) Hooks() ([]discover.Hook, error) {
args = append(args, "--link", l)
}
hook := discover.CreateNvidiaCDIHook(
d.nvidiaCDIHookPath,
hook := discover.CreateNvidiaCTKHook(
d.nvidiaCTKPath,
"create-symlinks",
args...,
)
@@ -172,20 +120,6 @@ func (d *byPathHookDiscoverer) Mounts() ([]discover.Mount, error) {
}
func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) {
devices, err := d.deviceNodes.Devices()
if err != nil {
return nil, fmt.Errorf("failed to discover device nodes: %v", err)
}
if len(devices) == 0 {
return nil, nil
}
selectedDevices := make(map[string]bool)
for _, d := range devices {
selectedDevices[d.HostPath] = true
}
candidates := []string{
fmt.Sprintf("/dev/dri/by-path/pci-%s-card", d.pciBusID),
fmt.Sprintf("/dev/dri/by-path/pci-%s-render", d.pciBusID),
@@ -193,21 +127,13 @@ func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) {
var links []string
for _, c := range candidates {
linkPath := filepath.Join(d.devRoot, c)
linkPath := filepath.Join(d.driverRoot, c)
device, err := os.Readlink(linkPath)
if err != nil {
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath)
continue
}
deviceNode := device
if !filepath.IsAbs(device) {
deviceNode = filepath.Join(filepath.Dir(linkPath), device)
}
if !selectedDevices[deviceNode] {
d.logger.Debugf("ignoring device symlink %v -> %v since %v is not mounted", linkPath, device, deviceNode)
continue
}
d.logger.Debugf("adding device symlink %v -> %v", linkPath, device)
links = append(links, fmt.Sprintf("%v::%v", device, linkPath))
}

View File

@@ -18,52 +18,41 @@ package generate
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
cdi "tags.cncf.io/container-device-interface/pkg/parser"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"sigs.k8s.io/yaml"
)
const (
allDeviceName = "all"
formatJSON = "json"
formatYAML = "yaml"
)
type command struct {
logger logger.Interface
logger *logrus.Logger
}
type options struct {
output string
format string
deviceNameStrategies cli.StringSlice
driverRoot string
devRoot string
nvidiaCDIHookPath string
ldconfigPath string
mode string
vendor string
class string
configSearchPaths cli.StringSlice
librarySearchPaths cli.StringSlice
csv struct {
files cli.StringSlice
ignorePatterns cli.StringSlice
}
type config struct {
output string
format string
deviceNameStrategy string
driverRoot string
nvidiaCTKPath string
}
// NewCommand constructs a generate-cdi command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
@@ -72,232 +61,325 @@ func NewCommand(logger logger.Interface) *cli.Command {
// build creates the CLI command
func (m command) build() *cli.Command {
opts := options{}
cfg := config{}
// Create the 'generate-cdi' command
c := cli.Command{
Name: "generate",
Usage: "Generate CDI specifications for use with CDI-enabled runtimes",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "config-search-path",
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
Destination: &opts.configSearchPaths,
},
&cli.StringFlag{
Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
Destination: &opts.output,
Destination: &cfg.output,
},
&cli.StringFlag{
Name: "format",
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
Value: spec.FormatYAML,
Destination: &opts.format,
Value: formatYAML,
Destination: &cfg.format,
},
&cli.StringFlag{
Name: "mode",
Aliases: []string{"discovery-mode"},
Usage: "The mode to use when discovering the available entities. One of [auto | nvml | wsl]. If mode is set to 'auto' the mode will be determined based on the system configuration.",
Value: nvcdi.ModeAuto,
Destination: &opts.mode,
},
&cli.StringFlag{
Name: "dev-root",
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
Destination: &opts.devRoot,
},
&cli.StringSliceFlag{
Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
Destination: &opts.deviceNameStrategies,
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
Value: deviceNameStrategyIndex,
Destination: &cfg.deviceNameStrategy,
},
&cli.StringFlag{
Name: "driver-root",
Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.",
Destination: &opts.driverRoot,
},
&cli.StringSliceFlag{
Name: "library-search-path",
Usage: "Specify the path to search for libraries when discovering the entities that should be included in the CDI specification.\n\tNote: This option only applies to CSV mode.",
Destination: &opts.librarySearchPaths,
Destination: &cfg.driverRoot,
},
&cli.StringFlag{
Name: "nvidia-cdi-hook-path",
Aliases: []string{"nvidia-ctk-path"},
Usage: "Specify the path to use for the nvidia-cdi-hook in the generated CDI specification. " +
"If not specified, the PATH will be searched for `nvidia-cdi-hook`. " +
"NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.",
Destination: &opts.nvidiaCDIHookPath,
},
&cli.StringFlag{
Name: "ldconfig-path",
Usage: "Specify the path to use for ldconfig in the generated CDI specification",
Destination: &opts.ldconfigPath,
},
&cli.StringFlag{
Name: "vendor",
Aliases: []string{"cdi-vendor"},
Usage: "the vendor string to use for the generated CDI specification.",
Value: "nvidia.com",
Destination: &opts.vendor,
},
&cli.StringFlag{
Name: "class",
Aliases: []string{"cdi-class"},
Usage: "the class string to use for the generated CDI specification.",
Value: "gpu",
Destination: &opts.class,
},
&cli.StringSliceFlag{
Name: "csv.file",
Usage: "The path to the list of CSV files to use when generating the CDI specification in CSV mode.",
Value: cli.NewStringSlice(csv.DefaultFileList()...),
Destination: &opts.csv.files,
},
&cli.StringSliceFlag{
Name: "csv.ignore-pattern",
Usage: "Specify a pattern the CSV mount specifications.",
Destination: &opts.csv.ignorePatterns,
Name: "nvidia-ctk-path",
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
Destination: &cfg.nvidiaCTKPath,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, opts *options) error {
opts.format = strings.ToLower(opts.format)
switch opts.format {
case spec.FormatJSON:
case spec.FormatYAML:
func (m command) validateFlags(r *cli.Context, cfg *config) error {
cfg.format = strings.ToLower(cfg.format)
switch cfg.format {
case formatJSON:
case formatYAML:
default:
return fmt.Errorf("invalid output format: %v", opts.format)
return fmt.Errorf("invalid output format: %v", cfg.format)
}
opts.mode = strings.ToLower(opts.mode)
switch opts.mode {
case nvcdi.ModeAuto:
case nvcdi.ModeCSV:
case nvcdi.ModeNvml:
case nvcdi.ModeWsl:
case nvcdi.ModeManagement:
default:
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
_, err := newDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return err
}
for _, strategy := range opts.deviceNameStrategies.Value() {
_, err := nvcdi.NewDeviceNamer(strategy)
if err != nil {
return err
}
}
cfg.nvidiaCTKPath = discover.FindNvidiaCTK(m.logger, cfg.nvidiaCTKPath)
opts.nvidiaCDIHookPath = config.ResolveNVIDIACDIHookPath(m.logger, opts.nvidiaCDIHookPath)
if outputFileFormat := formatFromFilename(opts.output); outputFileFormat != "" {
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
if !c.IsSet("format") {
opts.format = outputFileFormat
} else if outputFileFormat != opts.format {
m.logger.Warningf("Requested output format %q does not match format implied by output file name: %q", opts.format, outputFileFormat)
}
}
if err := cdi.ValidateVendorName(opts.vendor); err != nil {
return fmt.Errorf("invalid CDI vendor name: %v", err)
}
if err := cdi.ValidateClassName(opts.class); err != nil {
return fmt.Errorf("invalid CDI class name: %v", err)
}
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
spec, err := m.generateSpec(opts)
func (m command) run(c *cli.Context, cfg *config) error {
deviceNamer, err := newDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return fmt.Errorf("failed to create device namer: %v", err)
}
spec, err := m.generateSpec(
cfg.driverRoot,
discover.FindNvidiaCTK(m.logger, cfg.nvidiaCTKPath),
deviceNamer,
)
if err != nil {
return fmt.Errorf("failed to generate CDI spec: %v", err)
}
m.logger.Infof("Generated CDI spec with version %v", spec.Raw().Version)
if opts.output == "" {
_, err := spec.WriteTo(os.Stdout)
var outputTo io.Writer
if cfg.output == "" {
outputTo = os.Stdout
} else {
err := createParentDirsIfRequired(cfg.output)
if err != nil {
return fmt.Errorf("failed to write CDI spec to STDOUT: %v", err)
return fmt.Errorf("failed to create parent folders for output file: %v", err)
}
return nil
outputFile, err := os.Create(cfg.output)
if err != nil {
return fmt.Errorf("failed to create output file: %v", err)
}
defer outputFile.Close()
outputTo = outputFile
}
return spec.Save(opts.output)
if outputFileFormat := formatFromFilename(cfg.output); outputFileFormat != "" {
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
if !c.IsSet("format") {
cfg.format = outputFileFormat
} else if outputFileFormat != cfg.format {
m.logger.Warningf("Requested output format %q does not match format implied by output file name: %q", cfg.format, outputFileFormat)
}
}
data, err := yaml.Marshal(spec)
if err != nil {
return fmt.Errorf("failed to marshal CDI spec: %v", err)
}
if strings.ToLower(cfg.format) == formatJSON {
data, err = yaml.YAMLToJSONStrict(data)
if err != nil {
return fmt.Errorf("failed to convert CDI spec from YAML to JSON: %v", err)
}
}
err = writeToOutput(cfg.format, data, outputTo)
if err != nil {
return fmt.Errorf("failed to write output: %v", err)
}
return nil
}
func formatFromFilename(filename string) string {
ext := filepath.Ext(filename)
switch strings.ToLower(ext) {
case ".json":
return spec.FormatJSON
case ".yaml", ".yml":
return spec.FormatYAML
return formatJSON
case ".yaml":
return formatYAML
case ".yml":
return formatYAML
}
return ""
}
func (m command) generateSpec(opts *options) (spec.Interface, error) {
var deviceNamers []nvcdi.DeviceNamer
for _, strategy := range opts.deviceNameStrategies.Value() {
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
func writeToOutput(format string, data []byte, output io.Writer) error {
if format == formatYAML {
_, err := output.Write([]byte("---\n"))
if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err)
return fmt.Errorf("failed to write YAML separator: %v", err)
}
deviceNamers = append(deviceNamers, deviceNamer)
}
cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(opts.driverRoot),
nvcdi.WithDevRoot(opts.devRoot),
nvcdi.WithNVIDIACDIHookPath(opts.nvidiaCDIHookPath),
nvcdi.WithLdconfigPath(opts.ldconfigPath),
nvcdi.WithDeviceNamers(deviceNamers...),
nvcdi.WithMode(opts.mode),
nvcdi.WithConfigSearchPaths(opts.configSearchPaths.Value()),
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
nvcdi.WithCSVFiles(opts.csv.files.Value()),
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
)
_, err := output.Write(data)
if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err)
return fmt.Errorf("failed to write data: %v", err)
}
deviceSpecs, err := cdilib.GetAllDeviceSpecs()
return nil
}
func (m command) generateSpec(driverRoot string, nvidiaCTKPath string, namer deviceNamer) (*specs.Spec, error) {
nvmllib := nvml.New()
if r := nvmllib.Init(); r != nvml.SUCCESS {
return nil, r
}
defer nvmllib.Shutdown()
devicelib := device.New(device.WithNvml(nvmllib))
deviceSpecs, err := m.generateDeviceSpecs(devicelib, driverRoot, nvidiaCTKPath, namer)
if err != nil {
return nil, fmt.Errorf("failed to create device CDI specs: %v", err)
}
commonEdits, err := cdilib.GetCommonEdits()
allDevice := createAllDevice(deviceSpecs)
deviceSpecs = append(deviceSpecs, allDevice)
allEdits := edits.NewContainerEdits()
ipcs, err := NewIPCDiscoverer(m.logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create edits common for entities: %v", err)
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
}
return spec.New(
spec.WithVendor(opts.vendor),
spec.WithClass(opts.class),
spec.WithDeviceSpecs(deviceSpecs),
spec.WithEdits(*commonEdits.ContainerEdits),
spec.WithFormat(opts.format),
spec.WithMergedDeviceOptions(
transform.WithName(allDeviceName),
transform.WithSkipIfExists(true),
),
spec.WithPermissions(0644),
)
ipcEdits, err := edits.FromDiscoverer(ipcs)
if err != nil {
return nil, fmt.Errorf("failed to create container edits for IPC sockets: %v", err)
}
// TODO: We should not have to update this after the fact
for _, s := range ipcEdits.Mounts {
s.Options = append(s.Options, "noexec")
}
allEdits.Append(ipcEdits)
common, err := NewCommonDiscoverer(m.logger, driverRoot, nvidiaCTKPath, nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
}
deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, driverRoot, nvidiaCTKPath, deviceSpecs)
if err != nil {
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
}
commonEdits, err := edits.FromDiscoverer(discover.Merge(common, deviceFolderPermissionHooks))
if err != nil {
return nil, fmt.Errorf("failed to create container edits for common entities: %v", err)
}
allEdits.Append(commonEdits)
// We construct the spec and determine the minimum required version based on the specification.
spec := specs.Spec{
Version: "NOT_SET",
Kind: "nvidia.com/gpu",
Devices: deviceSpecs,
ContainerEdits: *allEdits.ContainerEdits,
}
minVersion, err := cdi.MinimumRequiredVersion(&spec)
if err != nil {
return nil, fmt.Errorf("failed to get minumum required CDI spec version: %v", err)
}
m.logger.Infof("Using minimum required CDI spec version: %s", minVersion)
spec.Version = minVersion
return &spec, nil
}
func (m command) generateDeviceSpecs(devicelib device.Interface, driverRoot string, nvidiaCTKPath string, namer deviceNamer) ([]specs.Device, error) {
var deviceSpecs []specs.Device
err := devicelib.VisitDevices(func(i int, d device.Device) error {
isMigEnabled, err := d.IsMigEnabled()
if err != nil {
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
}
if isMigEnabled {
return nil
}
device, err := NewFullGPUDiscoverer(m.logger, driverRoot, nvidiaCTKPath, d)
if err != nil {
return fmt.Errorf("failed to create device: %v", err)
}
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return fmt.Errorf("failed to create container edits for device: %v", err)
}
deviceName, err := namer.GetDeviceName(i, d)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: deviceName,
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
}
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
device, err := NewMigDeviceDiscoverer(m.logger, "", d, mig)
if err != nil {
return fmt.Errorf("failed to create MIG device: %v", err)
}
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return fmt.Errorf("failed to create container edits for MIG device: %v", err)
}
deviceName, err := namer.GetMigDeviceName(i, j, mig)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: deviceName,
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
return nil
})
if err != nil {
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
}
return deviceSpecs, nil
}
// createAllDevice creates an 'all' device which combines the edits from the previous devices
func createAllDevice(deviceSpecs []specs.Device) specs.Device {
edits := edits.NewContainerEdits()
for _, d := range deviceSpecs {
edit := cdi.ContainerEdits{
ContainerEdits: &d.ContainerEdits,
}
edits.Append(&edit)
}
all := specs.Device{
Name: "all",
ContainerEdits: *edits.ContainerEdits,
}
return all
}
// createParentDirsIfRequired creates the parent folders of the specified path if requried.
// Note that MkdirAll does not specifically check whether the specified path is non-empty and raises an error if it is.
// The path will be empty if filename in the current folder is specified, for example
func createParentDirsIfRequired(filename string) error {
dir := filepath.Dir(filename)
if dir == "" {
return nil
}
return os.MkdirAll(dir, 0755)
}

View File

@@ -14,24 +14,29 @@
# limitations under the License.
**/
package nvcdi
package generate
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
const (
dxgDeviceNode = "/dev/dxg"
)
// newDXGDeviceDiscoverer returns a Discoverer for DXG devices under WSL2.
func newDXGDeviceDiscoverer(logger logger.Interface, devRoot string) discover.Discover {
deviceNodes := discover.NewCharDeviceDiscoverer(
// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
func NewIPCDiscoverer(logger *logrus.Logger, driverRoot string) (discover.Discover, error) {
d := discover.NewMounts(
logger,
devRoot,
[]string{dxgDeviceNode},
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
[]string{
"/var/run/nvidia-persistenced/socket",
"/var/run/nvidia-fabricmanager/socket",
"/tmp/nvidia-mps",
},
)
return deviceNodes
return d, nil
}

View File

@@ -0,0 +1,75 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// NewMigDeviceDiscoverer creates a discoverer for the specified mig device and its parent.
func NewMigDeviceDiscoverer(logger *logrus.Logger, driverRoot string, parent device.Device, d device.MigDevice) (discover.Discover, error) {
minor, ret := parent.GetMinorNumber()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
}
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
migCaps, err := nvcaps.NewMigCaps()
if err != nil {
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
}
gi, ret := d.GetGpuInstanceId()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
}
ci, ret := d.GetComputeInstanceId()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
}
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
if err != nil {
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
}
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
if err != nil {
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
}
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
[]string{
parentPath,
giCapDevicePath,
ciCapDevicePath,
},
driverRoot,
)
return deviceNodes, nil
}

View File

@@ -0,0 +1,84 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
type deviceNamer interface {
GetDeviceName(int, device.Device) (string, error)
GetMigDeviceName(int, int, device.MigDevice) (string, error)
}
const (
deviceNameStrategyIndex = "index"
deviceNameStrategyTypeIndex = "type-index"
deviceNameStrategyUUID = "uuid"
)
type deviceNameIndex struct {
gpuPrefix string
migPrefix string
}
type deviceNameUUID struct{}
// newDeviceNamer creates a Device Namer based on the supplied strategy.
// This namer can be used to construct the names for MIG and GPU devices when generating the CDI spec.
func newDeviceNamer(strategy string) (deviceNamer, error) {
switch strategy {
case deviceNameStrategyIndex:
return deviceNameIndex{}, nil
case deviceNameStrategyTypeIndex:
return deviceNameIndex{gpuPrefix: "gpu", migPrefix: "mig"}, nil
case deviceNameStrategyUUID:
return deviceNameUUID{}, nil
}
return nil, fmt.Errorf("invalid device name strategy: %v", strategy)
}
// GetDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameIndex) GetDeviceName(i int, d device.Device) (string, error) {
return fmt.Sprintf("%s%d", s.gpuPrefix, i), nil
}
// GetMigDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameIndex) GetMigDeviceName(i int, j int, d device.MigDevice) (string, error) {
return fmt.Sprintf("%s%d:%d", s.migPrefix, i, j), nil
}
// GetDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameUUID) GetDeviceName(i int, d device.Device) (string, error) {
uuid, ret := d.GetUUID()
if ret != nvml.SUCCESS {
return "", fmt.Errorf("failed to get device UUID: %v", ret)
}
return uuid, nil
}
// GetMigDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameUUID) GetMigDeviceName(i int, j int, d device.MigDevice) (string, error) {
uuid, ret := d.GetUUID()
if ret != nvml.SUCCESS {
return "", fmt.Errorf("failed to get device UUID: %v", ret)
}
return uuid, nil
}

View File

@@ -1,104 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package list
import (
"errors"
"fmt"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type command struct {
logger logger.Interface
}
type config struct {
cdiSpecDirs cli.StringSlice
}
// NewCommand constructs a cdi list command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
cfg := config{}
// Create the command
c := cli.Command{
Name: "list",
Usage: "List the available CDI devices",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "spec-dir",
Usage: "specify the directories to scan for CDI specifications",
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
Destination: &cfg.cdiSpecDirs,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *config) error {
if len(cfg.cdiSpecDirs.Value()) == 0 {
return errors.New("at least one CDI specification directory must be specified")
}
return nil
}
func (m command) run(c *cli.Context, cfg *config) error {
registry, err := cdi.NewCache(
cdi.WithAutoRefresh(false),
cdi.WithSpecDirs(cfg.cdiSpecDirs.Value()...),
)
if err != nil {
return fmt.Errorf("failed to create CDI cache: %v", err)
}
_ = registry.Refresh()
if errors := registry.GetErrors(); len(errors) > 0 {
m.logger.Warningf("The following registry errors were reported:")
for k, err := range errors {
m.logger.Warningf("%v: %v", k, err)
}
}
devices := registry.ListDevices()
m.logger.Infof("Found %d CDI devices", len(devices))
for _, device := range devices {
fmt.Printf("%s\n", device)
}
return nil
}

View File

@@ -1,169 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package root
import (
"fmt"
"io"
"os"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
)
type command struct {
logger logger.Interface
}
type transformOptions struct {
input string
output string
}
type options struct {
transformOptions
from string
to string
relativeTo string
}
// NewCommand constructs a generate-cdi command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "root",
Usage: "Apply a root transform to a CDI specification",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "from",
Usage: "specify the root to be transformed",
Destination: &opts.from,
},
&cli.StringFlag{
Name: "input",
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
Value: "-",
Destination: &opts.input,
},
&cli.StringFlag{
Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
Destination: &opts.output,
},
&cli.StringFlag{
Name: "relative-to",
Usage: "specify whether the transform is relative to the host or to the container. One of [ host | container ]",
Value: "host",
Destination: &opts.relativeTo,
},
&cli.StringFlag{
Name: "to",
Usage: "specify the replacement root. If this is the same as the from root, the transform is a no-op.",
Value: "",
Destination: &opts.to,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, opts *options) error {
switch opts.relativeTo {
case "host":
case "container":
default:
return fmt.Errorf("invalid --relative-to value: %v", opts.relativeTo)
}
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
spec, err := opts.Load()
if err != nil {
return fmt.Errorf("failed to load CDI specification: %w", err)
}
err = transformroot.New(
transformroot.WithRoot(opts.from),
transformroot.WithTargetRoot(opts.to),
transformroot.WithRelativeTo(opts.relativeTo),
).Transform(spec.Raw())
if err != nil {
return fmt.Errorf("failed to transform CDI specification: %w", err)
}
return opts.Save(spec)
}
// Load lodas the input CDI specification
func (o transformOptions) Load() (spec.Interface, error) {
contents, err := o.getContents()
if err != nil {
return nil, fmt.Errorf("failed to read spec contents: %v", err)
}
raw, err := cdi.ParseSpec(contents)
if err != nil {
return nil, fmt.Errorf("failed to parse CDI spec: %v", err)
}
return spec.New(
spec.WithRawSpec(raw),
)
}
func (o transformOptions) getContents() ([]byte, error) {
if o.input == "-" {
return io.ReadAll(os.Stdin)
}
return os.ReadFile(o.input)
}
// Save saves the CDI specification to the output file
func (o transformOptions) Save(s spec.Interface) error {
if o.output == "" {
_, err := s.WriteTo(os.Stdout)
if err != nil {
return fmt.Errorf("failed to write CDI spec to STDOUT: %v", err)
}
return nil
}
return s.Save(o.output)
}

View File

@@ -1,52 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package transform
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type command struct {
logger logger.Interface
}
// NewCommand constructs a command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
c := cli.Command{
Name: "transform",
Usage: "Apply a transform to a CDI specification",
}
c.Flags = []cli.Flag{}
c.Subcommands = []*cli.Command{
root.NewCommand(m.logger),
}
return &c
}

View File

@@ -1,244 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"github.com/urfave/cli/v2"
createdefault "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/create-default"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type command struct {
logger logger.Interface
}
// options stores the subcommand options
type options struct {
flags.Options
setListSeparator string
sets cli.StringSlice
}
// NewCommand constructs an config command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
opts := options{}
// Create the 'config' command
c := cli.Command{
Name: "config",
Usage: "Interact with the NVIDIA Container Toolkit configuration",
Before: func(ctx *cli.Context) error {
return validateFlags(ctx, &opts)
},
Action: func(ctx *cli.Context) error {
return run(ctx, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "config-file",
Aliases: []string{"config", "c"},
Usage: "Specify the config file to modify.",
Value: config.GetConfigFilePath(),
Destination: &opts.Config,
},
&cli.StringSliceFlag{
Name: "set",
Usage: "Set a config value using the pattern 'key[=value]'. " +
"Specifying only 'key' is equivalent to 'key=true' for boolean settings. " +
"This flag can be specified multiple times, but only the last value for a specific " +
"config option is applied. " +
"If the setting represents a list, the elements are colon-separated.",
Destination: &opts.sets,
},
&cli.StringFlag{
Name: "set-list-separator",
Usage: "Specify a separator for lists applied using the set command.",
Hidden: true,
Value: ":",
Destination: &opts.setListSeparator,
},
&cli.BoolFlag{
Name: "in-place",
Aliases: []string{"i"},
Usage: "Modify the config file in-place",
Destination: &opts.InPlace,
},
&cli.StringFlag{
Name: "output",
Aliases: []string{"o"},
Usage: "Specify the output file to write to; If not specified, the output is written to stdout",
Destination: &opts.Output,
},
}
c.Subcommands = []*cli.Command{
createdefault.NewCommand(m.logger),
}
return &c
}
func validateFlags(c *cli.Context, opts *options) error {
if opts.setListSeparator == "" {
return fmt.Errorf("set-list-separator must be set")
}
return nil
}
func run(c *cli.Context, opts *options) error {
cfgToml, err := config.New(
config.WithConfigFile(opts.Config),
)
if err != nil {
return fmt.Errorf("unable to create config: %v", err)
}
for _, set := range opts.sets.Value() {
key, value, err := setFlagToKeyValue(set, opts.setListSeparator)
if err != nil {
return fmt.Errorf("invalid --set option %v: %w", set, err)
}
if value == nil {
_ = cfgToml.Delete(key)
} else {
cfgToml.Set(key, value)
}
}
if err := opts.EnsureOutputFolder(); err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
output, err := opts.CreateOutput()
if err != nil {
return fmt.Errorf("failed to open output file: %v", err)
}
defer output.Close()
if _, err := cfgToml.Save(output); err != nil {
return fmt.Errorf("failed to save config: %v", err)
}
return nil
}
var errInvalidConfigOption = errors.New("invalid config option")
var errUndefinedField = errors.New("undefined field")
var errInvalidFormat = errors.New("invalid format")
// setFlagToKeyValue converts a --set flag to a key-value pair.
// The set flag is of the form key[=value], with the value being optional if key refers to a
// boolean config option.
func setFlagToKeyValue(setFlag string, setListSeparator string) (string, interface{}, error) {
setParts := strings.SplitN(setFlag, "=", 2)
key := setParts[0]
field, err := getField(key)
if err != nil {
return key, nil, fmt.Errorf("%w: %w", errInvalidConfigOption, err)
}
kind := field.Kind()
if len(setParts) != 2 {
if kind == reflect.Bool || (kind == reflect.Pointer && field.Elem().Kind() == reflect.Bool) {
return key, true, nil
}
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
}
value := setParts[1]
if kind == reflect.Pointer && value != "nil" {
kind = field.Elem().Kind()
}
switch kind {
case reflect.Pointer:
return key, nil, nil
case reflect.Bool:
b, err := strconv.ParseBool(value)
if err != nil {
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
return key, b, nil
case reflect.String:
return key, value, nil
case reflect.Slice:
valueParts := strings.Split(value, setListSeparator)
switch field.Elem().Kind() {
case reflect.String:
return key, valueParts, nil
case reflect.Int:
var output []int64
for _, v := range valueParts {
vi, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return key, nil, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
output = append(output, vi)
}
return key, output, nil
}
}
return key, nil, fmt.Errorf("unsupported type for %v (%v)", setParts, kind)
}
func getField(key string) (reflect.Type, error) {
s, err := getStruct(reflect.TypeOf(config.Config{}), strings.Split(key, ".")...)
if err != nil {
return nil, err
}
return s.Type, err
}
func getStruct(current reflect.Type, paths ...string) (reflect.StructField, error) {
if len(paths) < 1 {
return reflect.StructField{}, fmt.Errorf("%w: no fields selected", errUndefinedField)
}
tomlField := paths[0]
for i := 0; i < current.NumField(); i++ {
f := current.Field(i)
v, ok := f.Tag.Lookup("toml")
if !ok {
continue
}
if strings.SplitN(v, ",", 2)[0] != tomlField {
continue
}
if len(paths) == 1 {
return f, nil
}
return getStruct(f.Type, paths[1:]...)
}
return reflect.StructField{}, fmt.Errorf("%w: %q", errUndefinedField, tomlField)
}

View File

@@ -1,143 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestSetFlagToKeyValue(t *testing.T) {
// TODO: We need to enable this test again since switching to reflect.
testCases := []struct {
description string
setFlag string
setListSeparator string
expectedKey string
expectedValue interface{}
expectedError error
}{
{
description: "option not present returns an error",
setFlag: "undefined=new-value",
expectedKey: "undefined",
expectedError: errInvalidConfigOption,
},
{
description: "undefined nexted option returns error",
setFlag: "nvidia-container-cli.undefined",
expectedKey: "nvidia-container-cli.undefined",
expectedError: errInvalidConfigOption,
},
{
description: "boolean option assumes true",
setFlag: "disable-require",
expectedKey: "disable-require",
expectedValue: true,
},
{
description: "boolean option returns true",
setFlag: "disable-require=true",
expectedKey: "disable-require",
expectedValue: true,
},
{
description: "boolean option returns false",
setFlag: "disable-require=false",
expectedKey: "disable-require",
expectedValue: false,
},
{
description: "invalid boolean option returns error",
setFlag: "disable-require=something",
expectedKey: "disable-require",
expectedValue: "something",
expectedError: errInvalidFormat,
},
{
description: "string option requires value",
setFlag: "swarm-resource",
expectedKey: "swarm-resource",
expectedValue: nil,
expectedError: errInvalidFormat,
},
{
description: "string option returns value",
setFlag: "swarm-resource=string-value",
expectedKey: "swarm-resource",
expectedValue: "string-value",
},
{
description: "string option returns value with equals",
setFlag: "swarm-resource=string-value=more",
expectedKey: "swarm-resource",
expectedValue: "string-value=more",
},
{
description: "string option treats bool value as string",
setFlag: "swarm-resource=true",
expectedKey: "swarm-resource",
expectedValue: "true",
},
{
description: "string option treats int value as string",
setFlag: "swarm-resource=5",
expectedKey: "swarm-resource",
expectedValue: "5",
},
{
description: "[]string option returns single value",
setFlag: "nvidia-container-cli.environment=string-value",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"string-value"},
},
{
description: "[]string option returns multiple values",
setFlag: "nvidia-container-cli.environment=first,second",
setListSeparator: ",",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
},
{
description: "[]string option returns values with equals",
setFlag: "nvidia-container-cli.environment=first=1,second=2",
setListSeparator: ",",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first=1", "second=2"},
},
{
description: "[]string option returns multiple values semi-colon",
setFlag: "nvidia-container-cli.environment=first;second",
setListSeparator: ";",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
if tc.setListSeparator == "" {
tc.setListSeparator = ","
}
k, v, err := setFlagToKeyValue(tc.setFlag, tc.setListSeparator)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expectedKey, k)
require.EqualValues(t, tc.expectedValue, v)
})
}
}

View File

@@ -1,94 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package defaultsubcommand
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type command struct {
logger logger.Interface
}
// NewCommand constructs a default command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
opts := flags.Options{}
// Create the 'default' command
c := cli.Command{
Name: "default",
Aliases: []string{"create-default", "generate-default"},
Usage: "Generate the default NVIDIA Container Toolkit configuration file",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "output",
Aliases: []string{"o"},
Usage: "Specify the output file to write to; If not specified, the output is written to stdout",
Destination: &opts.Output,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, opts *flags.Options) error {
return opts.Validate()
}
func (m command) run(c *cli.Context, opts *flags.Options) error {
cfgToml, err := config.New()
if err != nil {
return fmt.Errorf("unable to load or create config: %v", err)
}
if err := opts.EnsureOutputFolder(); err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
output, err := opts.CreateOutput()
if err != nil {
return fmt.Errorf("failed to open output file: %v", err)
}
defer output.Close()
if _, err = cfgToml.Save(output); err != nil {
return fmt.Errorf("failed to write output: %v", err)
}
return nil
}

View File

@@ -1,82 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package flags
import (
"fmt"
"io"
"os"
"path/filepath"
)
// Options stores options for the config commands
type Options struct {
Config string
Output string
InPlace bool
}
// Validate checks whether the options are valid.
func (o Options) Validate() error {
if o.InPlace && o.Output != "" {
return fmt.Errorf("cannot specify both --in-place and --output")
}
return nil
}
// GetOutput returns the effective output
func (o Options) GetOutput() string {
if o.InPlace {
return o.Config
}
return o.Output
}
// EnsureOutputFolder creates the output folder if it does not exist.
// If the output folder is not specified (i.e. output to STDOUT), it is ignored.
func (o Options) EnsureOutputFolder() error {
output := o.GetOutput()
if output == "" {
return nil
}
if dir := filepath.Dir(output); dir != "" {
return os.MkdirAll(dir, 0755)
}
return nil
}
// CreateOutput creates the writer for the output.
func (o Options) CreateOutput() (io.WriteCloser, error) {
output := o.GetOutput()
if output == "" {
return nullCloser{os.Stdout}, nil
}
return os.Create(output)
}
// nullCloser is a writer that does nothing on Close.
type nullCloser struct {
io.Writer
}
// Close is a no-op for a nullCloser.
func (d nullCloser) Close() error {
return nil
}

View File

@@ -17,33 +17,30 @@
package chmod
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
logger *logrus.Logger
}
type config struct {
paths cli.StringSlice
modeStr string
mode fs.FileMode
mode string
containerSpec string
}
// NewCommand constructs a chmod command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
@@ -69,13 +66,13 @@ func (m command) build() *cli.Command {
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "path",
Usage: "Specify a path to apply the specified mode to",
Usage: "Specifiy a path to apply the specified mode to",
Destination: &cfg.paths,
},
&cli.StringFlag{
Name: "mode",
Usage: "Specify the file mode",
Destination: &cfg.modeStr,
Destination: &cfg.mode,
},
&cli.StringFlag{
Name: "container-spec",
@@ -88,16 +85,10 @@ func (m command) build() *cli.Command {
}
func validateFlags(c *cli.Context, cfg *config) error {
if strings.TrimSpace(cfg.modeStr) == "" {
if strings.TrimSpace(cfg.mode) == "" {
return fmt.Errorf("a non-empty mode must be specified")
}
modeInt, err := strconv.ParseUint(cfg.modeStr, 8, 32)
if err != nil {
return fmt.Errorf("failed to parse mode as octal: %v", err)
}
cfg.mode = fs.FileMode(modeInt)
for _, p := range cfg.paths.Value() {
if strings.TrimSpace(p) == "" {
return fmt.Errorf("paths must not be empty")
@@ -121,38 +112,33 @@ func (m command) run(c *cli.Context, cfg *config) error {
return fmt.Errorf("empty container root detected")
}
paths := m.getPaths(containerRoot, cfg.paths.Value(), cfg.mode)
paths := m.getPaths(containerRoot, cfg.paths.Value())
if len(paths) == 0 {
m.logger.Debugf("No paths specified; exiting")
return nil
}
for _, path := range paths {
err = os.Chmod(path, cfg.mode)
// in some cases this is not an issue (e.g. whole /dev mounted), see #143
if errors.Is(err, fs.ErrPermission) {
m.logger.Debugf("Ignoring permission error with chmod: %v", err)
err = nil
}
locator := lookup.NewExecutableLocator(m.logger, "")
targets, err := locator.Locate("chmod")
if err != nil {
return fmt.Errorf("failed to locate chmod: %v", err)
}
chmodPath := targets[0]
return err
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
return syscall.Exec(chmodPath, args, nil)
}
// getPaths updates the specified paths relative to the root.
func (m command) getPaths(root string, paths []string, desiredMode fs.FileMode) []string {
func (m command) getPaths(root string, paths []string) []string {
var pathsInRoot []string
for _, f := range paths {
path := filepath.Join(root, f)
stat, err := os.Stat(path)
if err != nil {
if _, err := os.Stat(path); err != nil {
m.logger.Debugf("Skipping path %q: %v", path, err)
continue
}
if (stat.Mode()&(fs.ModePerm|fs.ModeSetuid|fs.ModeSetgid|fs.ModeSticky))^desiredMode == 0 {
m.logger.Debugf("Skipping path %q: already desired mode", path)
continue
}
pathsInRoot = append(pathsInRoot, path)
}

View File

@@ -22,17 +22,15 @@ import (
"path/filepath"
"strings"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
logger *logrus.Logger
}
type config struct {
@@ -43,7 +41,7 @@ type config struct {
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
@@ -57,7 +55,7 @@ func (m command) build() *cli.Command {
// Create the '' command
c := cli.Command{
Name: "create-symlinks",
Usage: "A hook to create symlinks in the container. This can be used to process CSV mount specs",
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
@@ -102,10 +100,7 @@ func (m command) run(c *cli.Context, cfg *config) error {
csvFiles := cfg.filenames.Value()
chainLocator := lookup.NewSymlinkChainLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(cfg.hostRoot),
)
chainLocator := lookup.NewSymlinkChainLocator(m.logger, cfg.hostRoot)
var candidates []string
for _, file := range csvFiles {
@@ -121,7 +116,7 @@ func (m command) run(c *cli.Context, cfg *config) error {
}
targets, err := chainLocator.Locate(ms.Path)
if err != nil {
m.logger.Warningf("Failed to locate symlink %v", ms.Path)
m.logger.Warnf("Failed to locate symlink %v", ms.Path)
}
candidates = append(candidates, targets...)
}
@@ -130,18 +125,21 @@ func (m command) run(c *cli.Context, cfg *config) error {
created := make(map[string]bool)
// candidates is a list of absolute paths to symlinks in a chain, or the final target of the chain.
for _, candidate := range candidates {
target, err := symlinks.Resolve(candidate)
targets, err := m.Locate(candidate)
if err != nil {
m.logger.Debugf("Skipping invalid link: %v", err)
continue
} else if target == candidate {
} else if len(targets) != 1 {
m.logger.Debugf("Unexepected number of targets: %v", targets)
continue
} else if targets[0] == candidate {
m.logger.Debugf("%v is not a symlink", candidate)
continue
}
err = m.createLink(created, cfg.hostRoot, containerRoot, target, candidate)
err = m.createLink(created, cfg.hostRoot, containerRoot, targets[0], candidate)
if err != nil {
m.logger.Warningf("Failed to create link %v: %v", []string{target, candidate}, err)
m.logger.Warnf("Failed to create link %v: %v", []string{targets[0], candidate}, err)
}
}
@@ -149,13 +147,13 @@ func (m command) run(c *cli.Context, cfg *config) error {
for _, l := range links {
parts := strings.Split(l, "::")
if len(parts) != 2 {
m.logger.Warningf("Invalid link specification %v", l)
m.logger.Warnf("Invalid link specification %v", l)
continue
}
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
if err != nil {
m.logger.Warningf("Failed to create link %v: %v", parts, err)
m.logger.Warnf("Failed to create link %v: %v", parts, err)
}
}
@@ -166,7 +164,7 @@ func (m command) run(c *cli.Context, cfg *config) error {
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
linkPath, err := changeRoot(hostRoot, containerRoot, link)
if err != nil {
m.logger.Warningf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
m.logger.Warnf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
}
if created[linkPath] {
m.logger.Debugf("Link %v already created", linkPath)
@@ -175,7 +173,7 @@ func (m command) createLink(created map[string]bool, hostRoot string, containerR
targetPath, err := changeRoot(hostRoot, "/", target)
if err != nil {
m.logger.Warningf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
m.logger.Warnf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
}
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)

View File

@@ -17,18 +17,20 @@
package hook
import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type hookCommand struct {
logger logger.Interface
logger *logrus.Logger
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := hookCommand{
logger: logger,
}
@@ -43,7 +45,11 @@ func (m hookCommand) build() *cli.Command {
Usage: "A collection of hooks that may be injected into an OCI spec",
}
hook.Subcommands = commands.New(m.logger)
hook.Subcommands = []*cli.Command{
ldcache.NewCommand(m.logger),
symlinks.NewCommand(m.logger),
chmod.NewCommand(m.logger),
}
return &hook
}

View File

@@ -0,0 +1,129 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
}
type config struct {
folders cli.StringSlice
containerSpec string
}
// NewCommand constructs an update-ldcache command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build the update-ldcache command
func (m command) build() *cli.Command {
cfg := config{}
// Create the 'update-ldcache' command
c := cli.Command{
Name: "update-ldcache",
Usage: "Update ldcache in a container by running ldconfig",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "folder",
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
Destination: &cfg.folders,
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
err = m.createConfig(containerRoot, cfg.folders.Value())
if err != nil {
return fmt.Errorf("failed to update ld.so.conf: %v", err)
}
args := []string{"/sbin/ldconfig"}
if containerRoot != "" {
args = append(args, "-r", containerRoot)
}
return syscall.Exec(args[0], args, nil)
}
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
// to include the required paths.
func (m command) createConfig(root string, folders []string) error {
if len(folders) == 0 {
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
return nil
}
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
if err != nil {
return fmt.Errorf("failed to create config file: %v", err)
}
defer configFile.Close()
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
configured := make(map[string]bool)
for _, folder := range folders {
if configured[folder] {
continue
}
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
}
configured[folder] = true
}
return nil
}

View File

@@ -17,17 +17,16 @@
package info
import (
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type command struct {
logger logger.Interface
logger *logrus.Logger
}
// NewCommand constructs an info command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
@@ -36,13 +35,13 @@ func NewCommand(logger logger.Interface) *cli.Command {
// build
func (m command) build() *cli.Command {
// Create the 'info' command
info := cli.Command{
// Create the 'hook' command
hook := cli.Command{
Name: "info",
Usage: "Provide information about the system",
}
info.Subcommands = []*cli.Command{}
hook.Subcommands = []*cli.Command{}
return &info
return &hook
}

View File

@@ -19,33 +19,29 @@ package main
import (
"os"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
infoCLI "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/info"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
)
// options defines the options that can be set for the CLI through config files,
var logger = log.New()
// config defines the options that can be set for the CLI through config files,
// environment variables, or command line flags
type options struct {
type config struct {
// Debug indicates whether the CLI is started in "debug" mode
Debug bool
// Quiet indicates whether the CLI is started in "quiet" mode
Quiet bool
}
func main() {
logger := logrus.New()
// Create a options struct to hold the parsed environment variables or command line flags
opts := options{}
// Create a config struct to hold the parsed environment variables or command line flags
config := config{}
// Create the top-level CLI
c := cli.NewApp()
@@ -61,25 +57,16 @@ func main() {
Name: "debug",
Aliases: []string{"d"},
Usage: "Enable debug-level logging",
Destination: &opts.Debug,
Destination: &config.Debug,
EnvVars: []string{"NVIDIA_CTK_DEBUG"},
},
&cli.BoolFlag{
Name: "quiet",
Usage: "Suppress all output except for errors; overrides --debug",
Destination: &opts.Quiet,
EnvVars: []string{"NVIDIA_CTK_QUIET"},
},
}
// Set log-level for all subcommands
c.Before = func(c *cli.Context) error {
logLevel := logrus.InfoLevel
if opts.Debug {
logLevel = logrus.DebugLevel
}
if opts.Quiet {
logLevel = logrus.ErrorLevel
logLevel := log.InfoLevel
if config.Debug {
logLevel = log.DebugLevel
}
logger.SetLevel(logLevel)
return nil
@@ -92,13 +79,12 @@ func main() {
infoCLI.NewCommand(logger),
cdi.NewCommand(logger),
system.NewCommand(logger),
config.NewCommand(logger),
}
// Run the CLI
err := c.Run(os.Args)
if err != nil {
logger.Errorf("%v", err)
os.Exit(1)
log.Errorf("%v", err)
log.Exit(1)
}
}

View File

@@ -19,39 +19,29 @@ package configure
import (
"encoding/json"
"fmt"
"path/filepath"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker"
"github.com/pelletier/go-toml"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
)
const (
defaultRuntime = "docker"
// defaultNVIDIARuntimeName is the default name to use in configs for the NVIDIA Container Runtime
defaultNVIDIARuntimeName = "nvidia"
// defaultNVIDIARuntimeExecutable is the default NVIDIA Container Runtime executable file name
defaultNVIDIARuntimeExecutable = "nvidia-container-runtime"
defaultNVIDIARuntimeExpecutablePath = "/usr/bin/nvidia-container-runtime"
defaultNVIDIARuntimeHookExpecutablePath = "/usr/bin/nvidia-container-runtime-hook"
defaultContainerdConfigFilePath = "/etc/containerd/config.toml"
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
)
type command struct {
logger logger.Interface
logger *logrus.Logger
}
// NewCommand constructs an configure command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
@@ -64,22 +54,7 @@ type config struct {
dryRun bool
runtime string
configFilePath string
mode string
hookFilePath string
runtimeConfigOverrideJSON string
nvidiaRuntime struct {
name string
path string
hookPath string
setAsDefault bool
}
// cdi-specific options
cdi struct {
enabled bool
}
nvidiaOptions nvidia.Options
}
func (m command) build() *cli.Command {
@@ -90,9 +65,6 @@ func (m command) build() *cli.Command {
configure := cli.Command{
Name: "configure",
Usage: "Add a runtime to the specified container engine",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &config)
},
Action: func(c *cli.Context) error {
return m.configureWrapper(c, &config)
},
@@ -106,7 +78,7 @@ func (m command) build() *cli.Command {
},
&cli.StringFlag{
Name: "runtime",
Usage: "the target runtime engine; one of [containerd, crio, docker]",
Usage: "the target runtime engine. One of [crio, docker]",
Value: defaultRuntime,
Destination: &config.runtime,
},
@@ -115,240 +87,117 @@ func (m command) build() *cli.Command {
Usage: "path to the config file for the target runtime",
Destination: &config.configFilePath,
},
&cli.StringFlag{
Name: "config-mode",
Usage: "the config mode for runtimes that support multiple configuration mechanisms",
Destination: &config.mode,
},
&cli.StringFlag{
Name: "oci-hook-path",
Usage: "the path to the OCI runtime hook to create if --config-mode=oci-hook is specified. If no path is specified, the generated hook is output to STDOUT.\n\tNote: The use of OCI hooks is deprecated.",
Destination: &config.hookFilePath,
},
&cli.StringFlag{
Name: "nvidia-runtime-name",
Usage: "specify the name of the NVIDIA runtime that will be added",
Value: defaultNVIDIARuntimeName,
Destination: &config.nvidiaRuntime.name,
Value: nvidia.RuntimeName,
Destination: &config.nvidiaOptions.RuntimeName,
},
&cli.StringFlag{
Name: "nvidia-runtime-path",
Aliases: []string{"runtime-path"},
Name: "runtime-path",
Usage: "specify the path to the NVIDIA runtime executable",
Value: defaultNVIDIARuntimeExecutable,
Destination: &config.nvidiaRuntime.path,
},
&cli.StringFlag{
Name: "nvidia-runtime-hook-path",
Usage: "specify the path to the NVIDIA Container Runtime hook executable",
Value: defaultNVIDIARuntimeHookExpecutablePath,
Destination: &config.nvidiaRuntime.hookPath,
Value: nvidia.RuntimeExecutable,
Destination: &config.nvidiaOptions.RuntimePath,
},
&cli.BoolFlag{
Name: "nvidia-set-as-default",
Aliases: []string{"set-as-default"},
Usage: "set the NVIDIA runtime as the default runtime",
Destination: &config.nvidiaRuntime.setAsDefault,
},
&cli.BoolFlag{
Name: "cdi.enabled",
Aliases: []string{"cdi.enable"},
Usage: "Enable CDI in the configured runtime",
Destination: &config.cdi.enabled,
},
&cli.StringFlag{
Name: "runtime-config-override",
Destination: &config.runtimeConfigOverrideJSON,
Usage: "specify additional runtime options as a JSON string. The paths are relative to the runtime config.",
Value: "{}",
EnvVars: []string{"RUNTIME_CONFIG_OVERRIDE"},
Name: "set-as-default",
Usage: "set the specified runtime as the default runtime",
Destination: &config.nvidiaOptions.SetAsDefault,
},
}
return &configure
}
func (m command) validateFlags(c *cli.Context, config *config) error {
if config.mode == "oci-hook" {
if !filepath.IsAbs(config.nvidiaRuntime.hookPath) {
return fmt.Errorf("the NVIDIA runtime hook path %q is not an absolute path", config.nvidiaRuntime.hookPath)
}
return nil
}
if config.mode != "" && config.mode != "config-file" {
m.logger.Warningf("Ignoring unsupported config mode for %v: %q", config.runtime, config.mode)
}
config.mode = "config-file"
switch config.runtime {
case "containerd", "crio", "docker":
break
default:
return fmt.Errorf("unrecognized runtime '%v'", config.runtime)
}
switch config.runtime {
case "containerd", "crio":
if config.nvidiaRuntime.path == defaultNVIDIARuntimeExecutable {
config.nvidiaRuntime.path = defaultNVIDIARuntimeExpecutablePath
}
if !filepath.IsAbs(config.nvidiaRuntime.path) {
return fmt.Errorf("the NVIDIA runtime path %q is not an absolute path", config.nvidiaRuntime.path)
}
}
if config.runtime != "containerd" && config.runtime != "docker" {
if config.cdi.enabled {
m.logger.Warningf("Ignoring cdi.enabled flag for %v", config.runtime)
}
config.cdi.enabled = false
}
if config.runtimeConfigOverrideJSON != "" && config.runtime != "containerd" {
m.logger.Warningf("Ignoring runtime-config-override flag for %v", config.runtime)
config.runtimeConfigOverrideJSON = ""
}
return nil
}
// configureWrapper updates the specified container engine config to enable the NVIDIA runtime
func (m command) configureWrapper(c *cli.Context, config *config) error {
switch config.mode {
case "oci-hook":
return m.configureOCIHook(c, config)
case "config-file":
return m.configureConfigFile(c, config)
switch config.runtime {
case "crio":
return m.configureCrio(c, config)
case "docker":
return m.configureDocker(c, config)
}
return fmt.Errorf("unsupported config-mode: %v", config.mode)
return fmt.Errorf("unrecognized runtime '%v'", config.runtime)
}
// configureConfigFile updates the specified container engine config file to enable the NVIDIA runtime.
func (m command) configureConfigFile(c *cli.Context, config *config) error {
configFilePath := config.resolveConfigFilePath()
var cfg engine.Interface
var err error
switch config.runtime {
case "containerd":
cfg, err = containerd.New(
containerd.WithLogger(m.logger),
containerd.WithPath(configFilePath),
)
case "crio":
cfg, err = crio.New(
crio.WithLogger(m.logger),
crio.WithPath(configFilePath),
)
case "docker":
cfg, err = docker.New(
docker.WithLogger(m.logger),
docker.WithPath(configFilePath),
)
default:
err = fmt.Errorf("unrecognized runtime '%v'", config.runtime)
}
if err != nil || cfg == nil {
return fmt.Errorf("unable to load config for runtime %v: %v", config.runtime, err)
// configureDocker updates the docker config to enable the NVIDIA Container Runtime
func (m command) configureDocker(c *cli.Context, config *config) error {
configFilePath := config.configFilePath
if configFilePath == "" {
configFilePath = defaultDockerConfigFilePath
}
runtimeConfigOverride, err := config.runtimeConfigOverride()
cfg, err := docker.LoadConfig(configFilePath)
if err != nil {
return fmt.Errorf("unable to parse config overrides: %w", err)
return fmt.Errorf("unable to load config: %v", err)
}
err = cfg.AddRuntime(
config.nvidiaRuntime.name,
config.nvidiaRuntime.path,
config.nvidiaRuntime.setAsDefault,
runtimeConfigOverride,
err = docker.UpdateConfig(
cfg,
config.nvidiaOptions.RuntimeName,
config.nvidiaOptions.RuntimePath,
config.nvidiaOptions.SetAsDefault,
)
if err != nil {
return fmt.Errorf("unable to update config: %v", err)
}
err = enableCDI(config, cfg)
if err != nil {
return fmt.Errorf("failed to enable CDI in %s: %w", config.runtime, err)
if config.dryRun {
output, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return fmt.Errorf("unable to convert to JSON: %v", err)
}
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
return nil
}
outputPath := config.getOuputConfigPath()
n, err := cfg.Save(outputPath)
err = docker.FlushConfig(cfg, configFilePath)
if err != nil {
return fmt.Errorf("unable to flush config: %v", err)
}
if outputPath != "" {
if n == 0 {
m.logger.Infof("Removed empty config from %v", outputPath)
} else {
m.logger.Infof("Wrote updated config to %v", outputPath)
}
m.logger.Infof("It is recommended that %v daemon be restarted.", config.runtime)
}
m.logger.Infof("Wrote updated config to %v", configFilePath)
m.logger.Infof("It is recommended that the docker daemon be restarted.")
return nil
}
// resolveConfigFilePath returns the default config file path for the configured container engine
func (c *config) resolveConfigFilePath() string {
if c.configFilePath != "" {
return c.configFilePath
}
switch c.runtime {
case "containerd":
return defaultContainerdConfigFilePath
case "crio":
return defaultCrioConfigFilePath
case "docker":
return defaultDockerConfigFilePath
}
return ""
}
// getOuputConfigPath returns the configured config path or "" if dry-run is enabled
func (c *config) getOuputConfigPath() string {
if c.dryRun {
return ""
}
return c.resolveConfigFilePath()
}
// runtimeConfigOverride converts the specified runtimeConfigOverride JSON string to a map.
func (o *config) runtimeConfigOverride() (map[string]interface{}, error) {
if o.runtimeConfigOverrideJSON == "" {
return nil, nil
// configureCrio updates the crio config to enable the NVIDIA Container Runtime
func (m command) configureCrio(c *cli.Context, config *config) error {
configFilePath := config.configFilePath
if configFilePath == "" {
configFilePath = defaultCrioConfigFilePath
}
runtimeOptions := make(map[string]interface{})
if err := json.Unmarshal([]byte(o.runtimeConfigOverrideJSON), &runtimeOptions); err != nil {
return nil, fmt.Errorf("failed to read %v as JSON: %w", o.runtimeConfigOverrideJSON, err)
}
return runtimeOptions, nil
}
// configureOCIHook creates and configures the OCI hook for the NVIDIA runtime
func (m *command) configureOCIHook(c *cli.Context, config *config) error {
err := ocihook.CreateHook(config.hookFilePath, config.nvidiaRuntime.hookPath)
cfg, err := crio.LoadConfig(configFilePath)
if err != nil {
return fmt.Errorf("error creating OCI hook: %v", err)
return fmt.Errorf("unable to load config: %v", err)
}
return nil
}
// enableCDI enables the use of CDI in the corresponding container engine
func enableCDI(config *config, cfg engine.Interface) error {
if !config.cdi.enabled {
err = crio.UpdateConfig(
cfg,
config.nvidiaOptions.RuntimeName,
config.nvidiaOptions.RuntimePath,
config.nvidiaOptions.SetAsDefault,
)
if err != nil {
return fmt.Errorf("unable to update config: %v", err)
}
if config.dryRun {
output, err := toml.Marshal(cfg)
if err != nil {
return fmt.Errorf("unable to convert to TOML: %v", err)
}
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
return nil
}
switch config.runtime {
case "containerd":
cfg.Set("enable_cdi", true)
case "docker":
cfg.Set("features", map[string]bool{"cdi": true})
default:
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
err = crio.FlushConfig(configFilePath, cfg)
if err != nil {
return fmt.Errorf("unable to flush config: %v", err)
}
m.logger.Infof("Wrote updated config to %v", configFilePath)
m.logger.Infof("It is recommended that the cri-o daemon be restarted.")
return nil
}

View File

@@ -0,0 +1,75 @@
/*
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package nvidia
const (
// RuntimeName is the default name to use in configs for the NVIDIA Container Runtime
RuntimeName = "nvidia"
// RuntimeExecutable is the default NVIDIA Container Runtime executable file name
RuntimeExecutable = "nvidia-container-runtime"
)
// Options specifies the options for the NVIDIA Container Runtime w.r.t a container engine such as docker.
type Options struct {
SetAsDefault bool
RuntimeName string
RuntimePath string
}
// Runtime defines an NVIDIA runtime with a name and a executable
type Runtime struct {
Name string
Path string
}
// DefaultRuntime returns the default runtime for the configured options.
// If the configuration is invalid or the default runtimes should not be set
// the empty string is returned.
func (o Options) DefaultRuntime() string {
if !o.SetAsDefault {
return ""
}
return o.RuntimeName
}
// Runtime creates a runtime struct based on the options.
func (o Options) Runtime() Runtime {
path := o.RuntimePath
if o.RuntimePath == "" {
path = RuntimeExecutable
}
r := Runtime{
Name: o.RuntimeName,
Path: path,
}
return r
}
// DockerRuntimesConfig generatest the expected docker config for the specified runtime
func (r Runtime) DockerRuntimesConfig() map[string]interface{} {
runtimes := make(map[string]interface{})
runtimes[r.Name] = map[string]interface{}{
"path": r.Path,
"args": []string{},
}
return runtimes
}

View File

@@ -17,18 +17,17 @@
package runtime
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type runtimeCommand struct {
logger logger.Interface
logger *logrus.Logger
}
// NewCommand constructs a runtime command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := runtimeCommand{
logger: logger,
}

View File

@@ -20,49 +20,37 @@ import (
"fmt"
"path/filepath"
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
)
type allPossible struct {
logger logger.Interface
devRoot string
logger *logrus.Logger
driverRoot string
deviceMajors devices.Devices
migCaps nvcaps.MigCaps
}
// newAllPossible returns a new allPossible device node lister.
// This lister lists all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func newAllPossible(logger logger.Interface, devRoot string) (nodeLister, error) {
func newAllPossible(logger *logrus.Logger, driverRoot string) (nodeLister, error) {
deviceMajors, err := devices.GetNVIDIADevices()
if err != nil {
return nil, fmt.Errorf("failed reading device majors: %v", err)
}
var requiredMajors []devices.Name
migCaps, err := nvcaps.NewMigCaps()
if err != nil {
return nil, fmt.Errorf("failed to read MIG caps: %v", err)
}
if migCaps == nil {
migCaps = make(nvcaps.MigCaps)
} else {
requiredMajors = append(requiredMajors, devices.NVIDIACaps)
}
requiredMajors = append(requiredMajors, devices.NVIDIAGPU, devices.NVIDIAUVM)
for _, name := range requiredMajors {
if !deviceMajors.Exists(name) {
return nil, fmt.Errorf("missing required device major %s", name)
}
}
l := allPossible{
logger: logger,
devRoot: devRoot,
driverRoot: driverRoot,
deviceMajors: deviceMajors,
migCaps: migCaps,
}
@@ -72,9 +60,8 @@ func newAllPossible(logger logger.Interface, devRoot string) (nodeLister, error)
// DeviceNodes returns a list of all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func (m allPossible) DeviceNodes() ([]deviceNode, error) {
gpus, err := nvpci.New(
nvpci.WithPCIDevicesRoot(filepath.Join(m.devRoot, nvpci.PCIDevicesRoot)),
nvpci.WithLogger(m.logger),
gpus, err := nvpci.NewFrom(
filepath.Join(m.driverRoot, nvpci.PCIDevicesRoot),
).GetGPUs()
if err != nil {
return nil, fmt.Errorf("failed to get GPU information: %v", err)
@@ -82,7 +69,7 @@ func (m allPossible) DeviceNodes() ([]deviceNode, error) {
count := len(gpus)
if count == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
return nil, nil
}
@@ -181,7 +168,7 @@ func (m allPossible) newDeviceNode(deviceName devices.Name, path string, minor i
major, _ := m.deviceMajors.Get(deviceName)
return deviceNode{
path: filepath.Join(m.devRoot, path),
path: filepath.Join(m.driverRoot, path),
major: uint32(major),
minor: uint32(minor),
}

View File

@@ -25,11 +25,8 @@ import (
"syscall"
"github.com/fsnotify/fsnotify"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
)
const (
@@ -37,21 +34,19 @@ const (
)
type command struct {
logger logger.Interface
logger *logrus.Logger
}
type config struct {
devCharPath string
driverRoot string
dryRun bool
watch bool
createAll bool
createDeviceNodes bool
loadKernelModules bool
devCharPath string
driverRoot string
dryRun bool
watch bool
createAll bool
}
// NewCommand constructs a command sub-command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
@@ -87,7 +82,7 @@ func (m command) build() *cli.Command {
Usage: "The path to the driver root. `DRIVER_ROOT`/dev is searched for NVIDIA device nodes.",
Value: "/",
Destination: &cfg.driverRoot,
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
EnvVars: []string{"DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "watch",
@@ -102,18 +97,6 @@ func (m command) build() *cli.Command {
Destination: &cfg.createAll,
EnvVars: []string{"CREATE_ALL"},
},
&cli.BoolFlag{
Name: "load-kernel-modules",
Usage: "Load the NVIDIA kernel modules before creating symlinks. This is only applicable when --create-all is set.",
Destination: &cfg.loadKernelModules,
EnvVars: []string{"LOAD_KERNEL_MODULES"},
},
&cli.BoolFlag{
Name: "create-device-nodes",
Usage: "Create the NVIDIA control device nodes in the driver root if they do not exist. This is only applicable when --create-all is set",
Destination: &cfg.createDeviceNodes,
EnvVars: []string{"CREATE_DEVICE_NODES"},
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "If set, the command will not create any symlinks.",
@@ -131,16 +114,6 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
return fmt.Errorf("create-all and watch are mutually exclusive")
}
if cfg.loadKernelModules && !cfg.createAll {
m.logger.Warning("load-kernel-modules is only applicable when create-all is set; ignoring")
cfg.loadKernelModules = false
}
if cfg.createDeviceNodes && !cfg.createAll {
m.logger.Warning("create-device-nodes is only applicable when create-all is set; ignoring")
cfg.createDeviceNodes = false
}
return nil
}
@@ -164,8 +137,6 @@ func (m command) run(c *cli.Context, cfg *config) error {
WithDriverRoot(cfg.driverRoot),
WithDryRun(cfg.dryRun),
WithCreateAll(cfg.createAll),
WithLoadKernelModules(cfg.loadKernelModules),
WithCreateDeviceNodes(cfg.createDeviceNodes),
)
if err != nil {
return fmt.Errorf("failed to create symlink creator: %v", err)
@@ -215,15 +186,12 @@ create:
}
type linkCreator struct {
logger logger.Interface
lister nodeLister
driverRoot string
devRoot string
devCharPath string
dryRun bool
createAll bool
createDeviceNodes bool
loadKernelModules bool
logger *logrus.Logger
lister nodeLister
driverRoot string
devCharPath string
dryRun bool
createAll bool
}
// Creator is an interface for creating symlinks to /dev/nv* devices in /dev/char.
@@ -241,81 +209,34 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
opt(&c)
}
if c.logger == nil {
c.logger = logger.New()
c.logger = logrus.StandardLogger()
}
if c.driverRoot == "" {
c.driverRoot = "/"
}
if c.devRoot == "" {
c.devRoot = "/"
}
if c.devCharPath == "" {
c.devCharPath = defaultDevCharPath
}
if err := c.setup(); err != nil {
return nil, err
}
if c.createAll {
lister, err := newAllPossible(c.logger, c.devRoot)
lister, err := newAllPossible(c.logger, c.driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create all possible device lister: %v", err)
}
c.lister = lister
} else {
c.lister = existing{c.logger, c.devRoot}
c.lister = existing{c.logger, c.driverRoot}
}
return c, nil
}
func (m linkCreator) setup() error {
if !m.loadKernelModules && !m.createDeviceNodes {
return nil
}
if m.loadKernelModules {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(m.dryRun),
nvmodules.WithRoot(m.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if m.createDeviceNodes {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(m.dryRun),
nvdevices.WithDevRoot(m.devRoot),
)
if err != nil {
return err
}
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA device nodes: %v", err)
}
}
return nil
}
// WithDriverRoot sets the driver root path.
// This is the path in which kernel modules must be loaded.
func WithDriverRoot(root string) Option {
return func(c *linkCreator) {
c.driverRoot = root
}
}
// WithDevRoot sets the root path for the /dev directory.
func WithDevRoot(root string) Option {
return func(c *linkCreator) {
c.devRoot = root
}
}
// WithDevCharPath sets the path at which the symlinks will be created.
func WithDevCharPath(path string) Option {
return func(c *linkCreator) {
@@ -331,7 +252,7 @@ func WithDryRun(dryRun bool) Option {
}
// WithLogger sets the logger.
func WithLogger(logger logger.Interface) Option {
func WithLogger(logger *logrus.Logger) Option {
return func(c *linkCreator) {
c.logger = logger
}
@@ -344,20 +265,6 @@ func WithCreateAll(createAll bool) Option {
}
}
// WithLoadKernelModules sets the loadKernelModules flag for the linkCreator.
func WithLoadKernelModules(loadKernelModules bool) Option {
return func(lc *linkCreator) {
lc.loadKernelModules = loadKernelModules
}
}
// WithCreateDeviceNodes sets the createDeviceNodes flag for the linkCreator.
func WithCreateDeviceNodes(createDeviceNodes bool) Option {
return func(lc *linkCreator) {
lc.createDeviceNodes = createDeviceNodes
}
}
// CreateLinks creates symlinks for all NVIDIA device nodes found in the driver root.
func (m linkCreator) CreateLinks() error {
deviceNodes, err := m.lister.DeviceNodes()
@@ -383,7 +290,7 @@ func (m linkCreator) CreateLinks() error {
err = os.Symlink(target, linkPath)
if err != nil {
m.logger.Warningf("Could not create symlink: %v", err)
m.logger.Warnf("Could not create symlink: %v", err)
}
}

View File

@@ -20,10 +20,9 @@ import (
"path/filepath"
"strings"
"golang.org/x/sys/unix"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
type nodeLister interface {
@@ -31,8 +30,8 @@ type nodeLister interface {
}
type existing struct {
logger logger.Interface
devRoot string
logger *logrus.Logger
driverRoot string
}
// DeviceNodes returns a list of NVIDIA device nodes in the specified root.
@@ -40,22 +39,22 @@ type existing struct {
func (m existing) DeviceNodes() ([]deviceNode, error) {
locator := lookup.NewCharDeviceLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(m.devRoot),
lookup.WithRoot(m.driverRoot),
lookup.WithOptional(true),
)
devices, err := locator.Locate("/dev/nvidia*")
if err != nil {
m.logger.Warningf("Error while locating device: %v", err)
m.logger.Warnf("Error while locating device: %v", err)
}
capDevices, err := locator.Locate("/dev/nvidia-caps/nvidia-*")
if err != nil {
m.logger.Warningf("Error while locating caps device: %v", err)
m.logger.Warnf("Error while locating caps device: %v", err)
}
if len(devices) == 0 && len(capDevices) == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
return nil, nil
}
@@ -64,13 +63,20 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
if m.nodeIsBlocked(d) {
continue
}
var stat unix.Stat_t
err := unix.Stat(d, &stat)
if err != nil {
m.logger.Warningf("Could not stat device: %v", err)
m.logger.Warnf("Could not stat device: %v", err)
continue
}
deviceNodes = append(deviceNodes, newDeviceNode(d, stat))
deviceNode := deviceNode{
path: d,
major: unix.Major(uint64(stat.Rdev)),
minor: unix.Minor(uint64(stat.Rdev)),
}
deviceNodes = append(deviceNodes, deviceNode)
}
return deviceNodes, nil

View File

@@ -1,28 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devchar
import "golang.org/x/sys/unix"
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
deviceNode := deviceNode{
path: d,
major: unix.Major(stat.Rdev),
minor: unix.Minor(stat.Rdev),
}
return deviceNode
}

View File

@@ -1,30 +0,0 @@
//go:build !linux
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devchar
import "golang.org/x/sys/unix"
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
deviceNode := deviceNode{
path: d,
major: unix.Major(uint64(stat.Rdev)),
minor: unix.Minor(uint64(stat.Rdev)),
}
return deviceNode
}

View File

@@ -1,142 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package createdevicenodes
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
)
type command struct {
logger logger.Interface
}
type options struct {
root string
devRoot string
dryRun bool
control bool
loadKernelModules bool
}
// NewCommand constructs a command sub-command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "create-device-nodes",
Usage: "A utility to create NVIDIA device nodes",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "root",
// TODO: Remove this alias
Aliases: []string{"driver-root"},
Usage: "the path to to the root to use to load the kernel modules. This root must be a chrootable path. " +
"If device nodes to be created these will be created at `ROOT`/dev unless an alternative path is specified",
Value: "/",
Destination: &opts.root,
// TODO: Remove the NVIDIA_DRIVER_ROOT and DRIVER_ROOT envvars.
EnvVars: []string{"ROOT", "NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
},
&cli.StringFlag{
Name: "dev-root",
Usage: "specify the root where `/dev` is located. If this is not specified, the root is assumed.",
Destination: &opts.devRoot,
EnvVars: []string{"NVIDIA_DEV_ROOT", "DEV_ROOT"},
},
&cli.BoolFlag{
Name: "control-devices",
Usage: "create all control device nodes: nvidiactl, nvidia-modeset, nvidia-uvm, nvidia-uvm-tools",
Destination: &opts.control,
},
&cli.BoolFlag{
Name: "load-kernel-modules",
Usage: "load the NVIDIA Kernel Modules before creating devices nodes",
Destination: &opts.loadKernelModules,
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "if set, the command will not perform any operations",
Value: false,
Destination: &opts.dryRun,
EnvVars: []string{"DRY_RUN"},
},
}
return &c
}
func (m command) validateFlags(r *cli.Context, opts *options) error {
if opts.devRoot == "" && opts.root != "" {
m.logger.Infof("Using dev-root %q", opts.root)
opts.devRoot = opts.root
}
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
if opts.loadKernelModules {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(opts.dryRun),
nvmodules.WithRoot(opts.root),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if opts.control {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(opts.dryRun),
nvdevices.WithDevRoot(opts.devRoot),
)
if err != nil {
return err
}
m.logger.Infof("Creating control device nodes at %s", opts.devRoot)
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
}
}
return nil
}

View File

@@ -1,102 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package createdevicenodes
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type command struct {
logger logger.Interface
}
type options struct {
driverRoot string
}
// NewCommand constructs a command sub-command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "print-ldcache",
Usage: "A utility to print the contents of the ldcache",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "driver-root",
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
},
}
return &c
}
func (m command) validateFlags(r *cli.Context, opts *options) error {
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
cache, err := ldcache.New(m.logger, opts.driverRoot)
if err != nil {
return fmt.Errorf("failed to create ldcache: %v", err)
}
lib32, lib64 := cache.List()
if len(lib32) == 0 {
m.logger.Info("No 32-bit libraries found")
} else {
m.logger.Infof("%d 32-bit libraries found", len(lib32))
for _, lib := range lib32 {
m.logger.Infof("%v", lib)
}
}
if len(lib64) == 0 {
m.logger.Info("No 64-bit libraries found")
} else {
m.logger.Infof("%d 64-bit libraries found", len(lib64))
for _, lib := range lib64 {
m.logger.Infof("%v", lib)
}
}
return nil
}

View File

@@ -17,20 +17,17 @@
package system
import (
"github.com/urfave/cli/v2"
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/print-ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
logger *logrus.Logger
}
// NewCommand constructs a runtime command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
@@ -46,8 +43,6 @@ func (m command) build() *cli.Command {
system.Subcommands = []*cli.Command{
devchar.NewCommand(m.logger),
devicenodes.NewCommand(m.logger),
ldcache.NewCommand(m.logger),
}
return &system

32
config/config.toml.debian Normal file
View File

@@ -0,0 +1,32 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -0,0 +1,32 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
user = "root:video"
ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -0,0 +1,32 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

32
config/config.toml.ubuntu Normal file
View File

@@ -0,0 +1,32 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig.real"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -22,7 +22,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -53,6 +53,15 @@ ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
# Debian Jessie still had ldconfig.real
RUN if [ "$(lsb_release -cs)" = "jessie" ]; then \
sed -i 's;"@/sbin/ldconfig";"@/sbin/ldconfig.real";' $DIST_DIR/config.toml; \
fi
WORKDIR $DIST_DIR
COPY packaging/debian ./debian

View File

@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG GOLANG_VERSION=x.x.x
ARG GOLANGCI_LINT_VERSION=v1.54.1
FROM golang:${GOLANG_VERSION}
RUN go install golang.org/x/lint/golint@6edffad5e6160f5949cdefc81710b2706fbcd4f6
@@ -20,8 +19,3 @@ RUN go install github.com/matryer/moq@latest
RUN go install github.com/gordonklaus/ineffassign@d2c82e48359b033cde9cf1307f6d5550b8d61321
RUN go install github.com/client9/misspell/cmd/misspell@latest
RUN go install github.com/google/go-licenses@latest
RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin ${GOLANGCI_LINT_VERSION}
# We need to set the /work directory as a safe directory.
# This allows git commands to run in the container.
RUN git config --file=/.gitconfig --add safe.directory /work

View File

@@ -15,7 +15,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture"; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -44,6 +44,16 @@ ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
WORKDIR $DIST_DIR/..
COPY packaging/rpm .

View File

@@ -17,15 +17,6 @@
ARG BASEIMAGE
FROM ${BASEIMAGE}
# centos:stream8 is EOL.
# We switch to the vault repositories for this base image.
ARG BASEIMAGE
RUN if [ "${BASEIMAGE}" = "quay.io/centos/centos:stream8" ]; then \
sed -i -e "s|mirrorlist=|#mirrorlist=|g" \
-e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" \
/etc/yum.repos.d/CentOS-Stream-*; \
fi
RUN yum install -y \
ca-certificates \
gcc \
@@ -42,7 +33,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture"; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -71,6 +62,16 @@ ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
WORKDIR $DIST_DIR/..
COPY packaging/rpm .

View File

@@ -20,7 +20,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -51,6 +51,10 @@ ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
WORKDIR $DIST_DIR
COPY packaging/debian ./debian

View File

@@ -14,10 +14,10 @@
# Supported OSs by architecture
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
AARCH64_TARGETS := centos7 centos8 rhel8 amazonlinux2
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
# Define top-level build targets
docker%: SHELL:=/bin/bash
@@ -88,35 +88,63 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
# private ubuntu target
--ubuntu%: OS := ubuntu
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--ubuntu%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
--ubuntu%: PKG_REV := 1
# private debian target
--debian%: OS := debian
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--debian%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
--debian%: PKG_REV := 1
# private centos target
--centos%: OS := centos
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
# private fedora target
--fedora%: OS := fedora
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
# The fedora(35) base image has very slow performance when building aarch64 packages.
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
# private amazonlinux target
--amazonlinux%: OS := amazonlinux
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
# private opensuse-leap target
--opensuse-leap%: OS = opensuse-leap
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
--opensuse-leap%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
# private rhel target (actually built on centos)
--rhel%: OS := centos
--rhel%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--rhel%: CONFIG_TOML_SUFFIX := rpm-yum
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
# We allow the CONFIG_TOML_SUFFIX to be overridden.
CONFIG_TOML_SUFFIX ?= $(OS)
docker-build-%:
@echo "Building for $(TARGET_PLATFORM)"
docker pull --platform=linux/$(ARCH) $(BASEIMAGE)
@@ -127,9 +155,10 @@ docker-build-%:
--build-arg BASEIMAGE="$(BASEIMAGE)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg PKG_NAME="$(LIB_NAME)" \
--build-arg PKG_VERS="$(PACKAGE_VERSION)" \
--build-arg PKG_REV="$(PACKAGE_REVISION)" \
--build-arg PKG_VERS="$(LIB_VERSION)" \
--build-arg PKG_REV="$(PKG_REV)" \
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--tag $(BUILDIMAGE) \
--file $(DOCKERFILE) .

37
go.mod
View File

@@ -1,37 +1,36 @@
module github.com/NVIDIA/nvidia-container-toolkit
go 1.20
go 1.18
require (
github.com/NVIDIA/go-nvlib v0.5.0
github.com/NVIDIA/go-nvml v0.12.4-0
github.com/fsnotify/fsnotify v1.7.0
github.com/opencontainers/runtime-spec v1.2.0
github.com/pelletier/go-toml v1.9.5
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.2
golang.org/x/mod v0.18.0
golang.org/x/sys v0.21.0
tags.cncf.io/container-device-interface v0.7.2
tags.cncf.io/container-device-interface/specs-go v0.7.0
github.com/BurntSushi/toml v1.0.0
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a
github.com/fsnotify/fsnotify v1.5.4
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb
github.com/pelletier/go-toml v1.9.4
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.7.0
github.com/urfave/cli/v2 v2.3.0
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342
golang.org/x/mod v0.5.0
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6
sigs.k8s.io/yaml v1.3.0
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/opencontainers/runc v1.1.4 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/opencontainers/selinux v1.10.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)

105
go.sum
View File

@@ -1,65 +1,84 @@
github.com/NVIDIA/go-nvlib v0.5.0 h1:951KGrfr+p3cs89alO9z/ZxPPWKxwht9tx9rxiADoLI=
github.com/NVIDIA/go-nvlib v0.5.0/go.mod h1:87z49ULPr4GWPSGfSIp3taU4XENRYN/enIg88MzcL4k=
github.com/NVIDIA/go-nvml v0.12.4-0 h1:4tkbB3pT1O77JGr0gQ6uD8FrsUPqP1A/EOEm2wI1TUg=
github.com/NVIDIA/go-nvml v0.12.4-0/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.0.0 h1:dtDWrepsVPfW9H/4y7dDgFc2MBUSeJhlaDtK13CxFlU=
github.com/BurntSushi/toml v1.0.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82 h1:x751Xx1tdxkiA/sdkv2J769n21UbYKzVOpe9S/h1M3k=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a h1:sP3PcgyIkRlHqfF3Jfpe/7G8kf/qpzG4C8r94y9hLbE=
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a/go.mod h1:xMRa4fJgXzSDFUCURSimOUgoSc+odohvO3uXT9xjqH0=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.1 h1:r/myEWzV9lfsM1tFLgDyu0atFtJ1fXn261LKYj/3DxU=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runc v1.1.4 h1:nRCz/8sKg6K6jgYAFLDlXzPeITBZJyX28DBVhWD+5dg=
github.com/opencontainers/runc v1.1.4/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb h1:1xSVPOd7/UA+39/hXEGnBJ13p6JFB0E1EvQFlrRDOXI=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.10.1 h1:09LIPVRP3uuZGQvgR+SgMSNBd1Eb3vlRbGqQpoHsF8w=
github.com/opencontainers/selinux v1.10.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI=
github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM=
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -67,18 +86,34 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342 h1:083n9fJt2dWOpJd/X/q9Xgl5XtQLL22uSFYbzVqJssg=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342/go.mod h1:GStidGxhaqJhYFW1YpOnLvYCbL2EsM0od7IW4u7+JgU=
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6 h1:cy1ko5847T/lJ45eyg/7uLprIE/amW5IXxGtEnQdYMI=
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -86,7 +121,3 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
tags.cncf.io/container-device-interface v0.7.2 h1:MLqGnWfOr1wB7m08ieI4YJ3IoLKKozEnnNYBtacDPQU=
tags.cncf.io/container-device-interface v0.7.2/go.mod h1:Xb1PvXv2BhfNb3tla4r9JL129ck1Lxv9KuU6eVOfKto=
tags.cncf.io/container-device-interface/specs-go v0.7.0 h1:w/maMGVeLP6TIQJVYT5pbqTi8SCw/iHZ+n4ignuGHqg=
tags.cncf.io/container-device-interface/specs-go v0.7.0/go.mod h1:hMAwAbMZyBLdmYqWgYcKH0F/yctNpV3P35f+/088A80=

View File

@@ -1,80 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o pipefail
this=`basename $0`
usage () {
cat << EOF
Generate a changelog for the specified tag
Usage: $this --reference <tag> [--remote <remote_name>]
Options:
--since specify the tag to start the changelog from (default: latest tag)
--remote specify the remote to fetch tags from (default: upstream)
--version specify the version to be released
--help/-h show this help and exit
EOF
}
REMOTE="upstream"
VERSION=""
REFERENCE=
# Parse command line options
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
--since)
REFERENCE="$2"
shift # past argument
shift # past value
;;
--remote)
REMOTE="$2"
shift # past argument
shift # past value
;;
--version)
VERSION="$2"
shift # past argument
shift # past value
;;
--help/-h) usage
exit 0
;;
*) usage
exit 1
;;
esac
done
# Fetch the latest tags from the remote
git fetch $REMOTE --tags
# if REFERENCE is not set, get the latest tag
if [ -z "$REFERENCE" ]; then
REFERENCE=$(git describe --tags $(git rev-list --tags --max-count=1))
fi
# Print the changelog
echo "## Changelog"
echo ""
echo "### Version $VERSION"
# Iterate over the commit messages and ignore the ones that start with "Merge" or "Bump"
git log --pretty=format:"%s" $REFERENCE..@ | grep -Ev "(^Merge )|(^Bump)" | sed 's/^\(.*\)/- \1/g'

View File

@@ -17,46 +17,32 @@
package config
import (
"os"
"strings"
"github.com/pelletier/go-toml"
)
// ContainerCLIConfig stores the options for the nvidia-container-cli
type ContainerCLIConfig struct {
Root string `toml:"root"`
Path string `toml:"path"`
Environment []string `toml:"environment"`
Debug string `toml:"debug"`
Ldcache string `toml:"ldcache"`
LoadKmods bool `toml:"load-kmods"`
// NoPivot disables the pivot root operation in the NVIDIA Container CLI.
// This is not exposed in the config if not set.
NoPivot bool `toml:"no-pivot,omitempty"`
NoCgroups bool `toml:"no-cgroups"`
User string `toml:"user"`
Ldconfig string `toml:"ldconfig"`
Root string
}
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
// This is only done for host LDConfigs and is required to handle systems where
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
func (c *ContainerCLIConfig) NormalizeLDConfigPath() string {
return NormalizeLDConfigPath(c.Ldconfig)
}
// getContainerCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getContainerCLIConfigFrom(toml *toml.Tree) *ContainerCLIConfig {
cfg := getDefaultContainerCLIConfig()
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
// This is only done for host LDConfigs and is required to handle systems where
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
func NormalizeLDConfigPath(path string) string {
if !strings.HasPrefix(path, "@") {
return path
if toml == nil {
return cfg
}
trimmedPath := strings.TrimSuffix(strings.TrimPrefix(path, "@"), ".real")
// If the .real path exists, we return that.
if _, err := os.Stat(trimmedPath + ".real"); err == nil {
return "@" + trimmedPath + ".real"
}
// If the .real path does not exists (or cannot be read) we return the non-.real path.
return "@" + trimmedPath
cfg.Root = toml.GetDefault("nvidia-container-cli.root", cfg.Root).(string)
return cfg
}
// getDefaultContainerCLIConfig defines the default values for the config
func getDefaultContainerCLIConfig() *ContainerCLIConfig {
c := ContainerCLIConfig{
Root: "",
}
return &c
}

View File

@@ -1,83 +0,0 @@
/**
# Copyright 2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
)
func TestNormalizeLDConfigPath(t *testing.T) {
testDir := t.TempDir()
f, err := os.Create(filepath.Join(testDir, "exists.real"))
require.NoError(t, err)
_ = f.Close()
testCases := []struct {
description string
ldconfig string
expected string
}{
{
description: "empty input",
},
{
description: "non-host with .real suffix returns as is",
ldconfig: "/some/path/ldconfig.real",
expected: "/some/path/ldconfig.real",
},
{
description: "non-host without .real suffix returns as is",
ldconfig: "/some/path/ldconfig",
expected: "/some/path/ldconfig",
},
{
description: "host .real file exists is returned",
ldconfig: "@" + filepath.Join(testDir, "exists.real"),
expected: "@" + filepath.Join(testDir, "exists.real"),
},
{
description: "host resolves .real file",
ldconfig: "@" + filepath.Join(testDir, "exists"),
expected: "@" + filepath.Join(testDir, "exists.real"),
},
{
description: "host .real file not exists strips suffix",
ldconfig: "@/does/not/exist.real",
expected: "@/does/not/exist",
},
{
description: "host file returned as is if no .real file exsits",
ldconfig: "@/does/not/exist",
expected: "@/does/not/exist",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
c := ContainerCLIConfig{
Ldconfig: tc.ldconfig,
}
require.Equal(t, tc.expected, c.NormalizeLDConfigPath())
})
}
}

View File

@@ -17,28 +17,17 @@
package config
import (
"bufio"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"path"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/pelletier/go-toml"
)
const (
configOverride = "XDG_CONFIG_HOME"
configFilePath = "nvidia-container-runtime/config.toml"
nvidiaCTKExecutable = "nvidia-ctk"
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
nvidiaCDIHookDefaultFilePath = "/usr/bin/nvidia-cdi-hook"
nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook"
)
var (
@@ -49,201 +38,77 @@ var (
NVIDIAContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
// NVIDIAContainerToolkitExecutable is the executable name for the NVIDIA Container Toolkit (an alias for the NVIDIA Container Runtime Hook)
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
configDir = "/etc/"
)
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
type Config struct {
DisableRequire bool `toml:"disable-require"`
SwarmResource string `toml:"swarm-resource"`
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
SupportedDriverCapabilities string `toml:"supported-driver-capabilities"`
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
NVIDIAContainerRuntimeHookConfig RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
// Features allows for finer control over optional features.
Features features `toml:"features,omitempty"`
}
// GetConfigFilePath returns the path to the config file for the configured system
func GetConfigFilePath() string {
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
return filepath.Join(XDGConfigDir, configFilePath)
}
return filepath.Join("/etc", configFilePath)
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
}
// GetConfig sets up the config struct. Values are read from a toml file
// or set via the environment.
func GetConfig() (*Config, error) {
cfg, err := New(
WithConfigFile(GetConfigFilePath()),
)
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
configDir = XDGConfigDir
}
configFilePath := path.Join(configDir, configFilePath)
tomlFile, err := os.Open(configFilePath)
if err != nil {
return getDefaultConfig(), nil
}
defer tomlFile.Close()
cfg, err := loadConfigFrom(tomlFile)
if err != nil {
return nil, fmt.Errorf("failed to read config values: %v", err)
}
return cfg, nil
}
// loadRuntimeConfigFrom reads the config from the specified Reader
func loadConfigFrom(reader io.Reader) (*Config, error) {
toml, err := toml.LoadReader(reader)
if err != nil {
return nil, err
}
return cfg.Config()
return getConfigFrom(toml)
}
// GetDefault defines the default values for the config
func GetDefault() (*Config, error) {
d := Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: image.SupportedDriverCapabilities.String(),
NVIDIAContainerCLIConfig: ContainerCLIConfig{
LoadKmods: true,
Ldconfig: getLdConfigPath(),
User: getUserGroup(),
},
NVIDIACTKConfig: CTKConfig{
Path: nvidiaCTKExecutable,
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{cdi.AnnotationPrefix},
SpecDirs: cdi.DefaultSpecDirs,
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: NVIDIAContainerRuntimeHookExecutable,
},
}
return &d, nil
}
// getConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getConfigFrom(toml *toml.Tree) (*Config, error) {
cfg := getDefaultConfig()
func getLdConfigPath() string {
return NormalizeLDConfigPath("@/sbin/ldconfig")
}
func getUserGroup() string {
if isSuse() {
return "root:video"
}
return ""
}
// isSuse returns whether a SUSE-based distribution was detected.
func isSuse() bool {
suseDists := map[string]bool{
"suse": true,
"opensuse": true,
if toml == nil {
return cfg, nil
}
idsLike := getDistIDLike()
for _, id := range idsLike {
if suseDists[id] {
return true
}
}
return false
}
// getDistIDLike returns the ID_LIKE field from /etc/os-release.
// We can override this for testing.
var getDistIDLike = func() []string {
releaseFile, err := os.Open("/etc/os-release")
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
runtimeConfig, err := getRuntimeConfigFrom(toml)
if err != nil {
return nil
return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err)
}
defer releaseFile.Close()
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
scanner := bufio.NewScanner(releaseFile)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "ID_LIKE=") {
value := strings.Trim(strings.TrimPrefix(line, "ID_LIKE="), "\"")
return strings.Split(value, " ")
}
}
return nil
return cfg, nil
}
// ResolveNVIDIACTKPath resolves the path to the nvidia-ctk binary.
// This executable is used in hooks and needs to be an absolute path.
// If the path is specified as an absolute path, it is used directly
// without checking for existence of an executable at that path.
//
// Deprecated: Use ResolveNVIDIACDIHookPath directly instead.
func ResolveNVIDIACTKPath(logger logger.Interface, nvidiaCTKPath string) string {
return resolveWithDefault(
logger,
"NVIDIA Container Toolkit CLI",
nvidiaCTKPath,
nvidiaCTKDefaultFilePath,
)
}
// ResolveNVIDIACDIHookPath resolves the path to the nvidia-cdi-hook binary.
// This executable is used in hooks and needs to be an absolute path.
// If the path is specified as an absolute path, it is used directly
// without checking for existence of an executable at that path.
func ResolveNVIDIACDIHookPath(logger logger.Interface, nvidiaCDIHookPath string) string {
if filepath.Base(nvidiaCDIHookPath) == "nvidia-ctk" {
return resolveWithDefault(
logger,
"NVIDIA Container Toolkit CLI",
nvidiaCDIHookPath,
nvidiaCTKDefaultFilePath,
)
}
return resolveWithDefault(
logger,
"NVIDIA CDI Hook CLI",
nvidiaCDIHookPath,
nvidiaCDIHookDefaultFilePath,
)
}
// ResolveNVIDIAContainerRuntimeHookPath resolves the path the nvidia-container-runtime-hook binary.
func ResolveNVIDIAContainerRuntimeHookPath(logger logger.Interface, nvidiaContainerRuntimeHookPath string) string {
return resolveWithDefault(
logger,
"NVIDIA Container Runtime Hook",
nvidiaContainerRuntimeHookPath,
nvidiaContainerRuntimeHookDefaultPath,
)
}
// resolveWithDefault resolves the path to the specified binary.
// If an absolute path is specified, it is used directly without searching for the binary.
// If the binary cannot be found in the path, the specified default is used instead.
func resolveWithDefault(logger logger.Interface, label string, path string, defaultPath string) string {
if filepath.IsAbs(path) {
logger.Debugf("Using specified %v path %v", label, path)
return path
// getDefaultConfig defines the default values for the config
func getDefaultConfig() *Config {
c := Config{
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
NVIDIACTKConfig: *getDefaultCTKConfig(),
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
}
if path == "" {
path = filepath.Base(defaultPath)
}
logger.Debugf("Locating %v as %v", label, path)
lookup := lookup.NewExecutableLocator(logger, "")
resolvedPath := defaultPath
targets, err := lookup.Locate(path)
if err != nil {
logger.Warningf("Failed to locate %v: %v", path, err)
} else {
logger.Debugf("Found %v candidates: %v", path, targets)
resolvedPath = targets[0]
}
logger.Debugf("Using %v path %v", label, path)
return resolvedPath
return &c
}

View File

@@ -17,6 +17,7 @@
package config
import (
"io/ioutil"
"os"
"path/filepath"
"strings"
@@ -26,61 +27,50 @@ import (
)
func TestGetConfigWithCustomConfig(t *testing.T) {
testDir := t.TempDir()
t.Setenv(configOverride, testDir)
filename := filepath.Join(testDir, configFilePath)
wd, err := os.Getwd()
require.NoError(t, err)
// By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
testDir := filepath.Join(wd, "test")
filename := filepath.Join(testDir, configFilePath)
os.Setenv(configOverride, testDir)
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, os.WriteFile(filename, contents, 0600))
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
cfg, err := GetConfig()
require.NoError(t, err)
require.Equal(t, "/nvidia-container-toolkit.log", cfg.NVIDIAContainerRuntimeConfig.DebugFilePath)
require.Equal(t, cfg.NVIDIAContainerRuntimeConfig.DebugFilePath, "/nvidia-container-toolkit.log")
}
func TestGetConfig(t *testing.T) {
testCases := []struct {
description string
contents []string
expectedError error
inspectLdconfig bool
distIdsLike []string
expectedConfig *Config
description string
contents []string
expectedError error
expectedConfig *Config
}{
{
description: "empty config is default",
inspectLdconfig: true,
description: "empty config is default",
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
LoadKmods: true,
Ldconfig: "WAS_CHECKED",
Root: "",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc", "crun"},
Runtimes: []string{"docker-runc", "runc"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
@@ -89,32 +79,19 @@ func TestGetConfig(t *testing.T) {
{
description: "config options set inline",
contents: []string{
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
"supported-driver-capabilities = \"compute,utility\"",
"nvidia-container-cli.root = \"/bar/baz\"",
"nvidia-container-cli.load-kmods = false",
"nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"",
"nvidia-container-cli.user = \"foo:bar\"",
"nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.experimental = true",
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
"nvidia-container-runtime.log-level = \"debug\"",
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
"nvidia-container-runtime.mode = \"not-auto\"",
"nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"",
"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"nvidia-container-runtime.modes.cdi.spec-dirs = [\"/except/etc/cdi\", \"/not/var/run/cdi\",]",
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"nvidia-container-runtime-hook.path = \"/foo/bar/nvidia-container-runtime-hook\"",
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: false,
SupportedDriverCapabilities: "compute,utility",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
LoadKmods: false,
Ldconfig: "/foo/bar/ldconfig",
User: "foo:bar",
Root: "/bar/baz",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
@@ -125,22 +102,8 @@ func TestGetConfig(t *testing.T) {
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "example.vendor.com/device",
AnnotationPrefixes: []string{
"cdi.k8s.io/",
"example.vendor.com/",
},
SpecDirs: []string{
"/except/etc/cdi",
"/not/var/run/cdi",
},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "/foo/bar/nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
@@ -149,38 +112,23 @@ func TestGetConfig(t *testing.T) {
{
description: "config options set in section",
contents: []string{
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
"supported-driver-capabilities = \"compute,utility\"",
"[nvidia-container-cli]",
"root = \"/bar/baz\"",
"load-kmods = false",
"ldconfig = \"/foo/bar/ldconfig\"",
"user = \"foo:bar\"",
"[nvidia-container-runtime]",
"debug = \"/foo/bar\"",
"experimental = true",
"discover-mode = \"not-legacy\"",
"log-level = \"debug\"",
"runtimes = [\"/some/runtime\",]",
"mode = \"not-auto\"",
"[nvidia-container-runtime.modes.cdi]",
"default-kind = \"example.vendor.com/device\"",
"annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"spec-dirs = [\"/except/etc/cdi\", \"/not/var/run/cdi\",]",
"[nvidia-container-runtime.modes.csv]",
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"[nvidia-container-runtime-hook]",
"path = \"/foo/bar/nvidia-container-runtime-hook\"",
"[nvidia-ctk]",
"path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: false,
SupportedDriverCapabilities: "compute,utility",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
LoadKmods: false,
Ldconfig: "/foo/bar/ldconfig",
User: "foo:bar",
Root: "/bar/baz",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
@@ -191,147 +139,27 @@ func TestGetConfig(t *testing.T) {
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "example.vendor.com/device",
AnnotationPrefixes: []string{
"cdi.k8s.io/",
"example.vendor.com/",
},
SpecDirs: []string{
"/except/etc/cdi",
"/not/var/run/cdi",
},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "/foo/bar/nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
},
},
{
description: "suse config",
distIdsLike: []string{"suse", "opensuse"},
inspectLdconfig: true,
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
LoadKmods: true,
Ldconfig: "WAS_CHECKED",
User: "root:video",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
},
},
{
description: "suse config overrides user",
distIdsLike: []string{"suse", "opensuse"},
inspectLdconfig: true,
contents: []string{
"nvidia-container-cli.user = \"foo:bar\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
LoadKmods: true,
Ldconfig: "WAS_CHECKED",
User: "foo:bar",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
defer setGetDistIDLikeForTest(tc.distIdsLike)()
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
tomlCfg, err := loadConfigTomlFrom(reader)
cfg, err := loadConfigFrom(reader)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
cfg, err := tomlCfg.Config()
require.NoError(t, err)
// We first handle the ldconfig path since this is currently system-dependent.
if tc.inspectLdconfig {
ldconfig := cfg.NVIDIAContainerCLIConfig.Ldconfig
require.True(t, strings.HasPrefix(ldconfig, "@/sbin/ldconfig"))
remaining := strings.TrimPrefix(ldconfig, "@/sbin/ldconfig")
require.True(t, remaining == ".real" || remaining == "")
cfg.NVIDIAContainerCLIConfig.Ldconfig = "WAS_CHECKED"
}
require.EqualValues(t, tc.expectedConfig, cfg)
})
}
}
// setGetDistIDsLikeForTest overrides the distribution IDs that would normally be read from the /etc/os-release file.
func setGetDistIDLikeForTest(ids []string) func() {
if ids == nil {
return func() {}
}
original := getDistIDLike
getDistIDLike = func() []string {
return ids
}
return func() {
getDistIDLike = original
}
}

View File

@@ -0,0 +1,125 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package crio
import (
"fmt"
"os"
"github.com/pelletier/go-toml"
log "github.com/sirupsen/logrus"
)
// LoadConfig loads the cri-o config from disk
func LoadConfig(config string) (*toml.Tree, error) {
log.Infof("Loading config: %v", config)
info, err := os.Stat(config)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
configFile := config
if os.IsNotExist(err) {
configFile = "/dev/null"
log.Infof("Config file does not exist, creating new one")
}
cfg, err := toml.LoadFile(configFile)
if err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return cfg, nil
}
// UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime
func UpdateConfig(config *toml.Tree, runtimeClass string, runtimePath string, setAsDefault bool) error {
switch runc := config.Get("crio.runtime.runtimes.runc").(type) {
case *toml.Tree:
runc, _ = toml.Load(runc.String())
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass}, runc)
}
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_path"}, runtimePath)
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_type"}, "oci")
if setAsDefault {
config.SetPath([]string{"crio", "runtime", "default_runtime"}, runtimeClass)
}
return nil
}
// RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime
func RevertConfig(config *toml.Tree, runtimeClass string) error {
if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok {
if runtimeClass == runtime {
config.DeletePath([]string{"crio", "runtime", "default_runtime"})
}
}
runtimeClassPath := []string{"crio", "runtime", "runtimes", runtimeClass}
config.DeletePath(runtimeClassPath)
for i := 0; i < len(runtimeClassPath); i++ {
remainingPath := runtimeClassPath[:len(runtimeClassPath)-i]
if entry, ok := config.GetPath(remainingPath).(*toml.Tree); ok {
if len(entry.Keys()) != 0 {
break
}
config.DeletePath(remainingPath)
}
}
return nil
}
// FlushConfig flushes the updated/reverted config out to disk
func FlushConfig(config string, cfg *toml.Tree) error {
log.Infof("Flushing config")
output, err := cfg.ToTomlString()
if err != nil {
return fmt.Errorf("unable to convert to TOML: %v", err)
}
switch len(output) {
case 0:
err := os.Remove(config)
if err != nil {
return fmt.Errorf("unable to remove empty file: %v", err)
}
log.Infof("Config empty, removing file")
default:
f, err := os.Create(config)
if err != nil {
return fmt.Errorf("unable to open '%v' for writing: %v", config, err)
}
defer f.Close()
_, err = f.WriteString(output)
if err != nil {
return fmt.Errorf("unable to write output: %v", err)
}
}
log.Infof("Successfully flushed config")
return nil
}

View File

@@ -0,0 +1,117 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
log "github.com/sirupsen/logrus"
)
// LoadConfig loads the docker config from disk
func LoadConfig(configFilePath string) (map[string]interface{}, error) {
log.Infof("Loading docker config from %v", configFilePath)
info, err := os.Stat(configFilePath)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
cfg := make(map[string]interface{})
if os.IsNotExist(err) {
log.Infof("Config file does not exist, creating new one")
return cfg, nil
}
readBytes, err := ioutil.ReadFile(configFilePath)
if err != nil {
return nil, fmt.Errorf("unable to read config: %v", err)
}
reader := bytes.NewReader(readBytes)
if err := json.NewDecoder(reader).Decode(&cfg); err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return cfg, nil
}
// UpdateConfig updates the docker config to include the nvidia runtimes
func UpdateConfig(config map[string]interface{}, runtimeName string, runtimePath string, setAsDefault bool) error {
// Read the existing runtimes
runtimes := make(map[string]interface{})
if _, exists := config["runtimes"]; exists {
runtimes = config["runtimes"].(map[string]interface{})
}
// Add / update the runtime definitions
runtimes[runtimeName] = map[string]interface{}{
"path": runtimePath,
"args": []string{},
}
// Update the runtimes definition
if len(runtimes) > 0 {
config["runtimes"] = runtimes
}
if setAsDefault {
config["default-runtime"] = runtimeName
}
return nil
}
// FlushConfig flushes the updated/reverted config out to disk
func FlushConfig(cfg map[string]interface{}, configFilePath string) error {
log.Infof("Flushing docker config to %v", configFilePath)
output, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return fmt.Errorf("unable to convert to JSON: %v", err)
}
switch len(output) {
case 0:
err := os.Remove(configFilePath)
if err != nil {
return fmt.Errorf("unable to remove empty file: %v", err)
}
log.Infof("Config empty, removing file")
default:
f, err := os.Create(configFilePath)
if err != nil {
return fmt.Errorf("unable to open %v for writing: %v", configFilePath, err)
}
defer f.Close()
_, err = f.WriteString(string(output))
if err != nil {
return fmt.Errorf("unable to write output: %v", err)
}
}
log.Infof("Successfully flushed config")
return nil
}

View File

@@ -26,7 +26,7 @@ import (
func TestUpdateConfigDefaultRuntime(t *testing.T) {
testCases := []struct {
config Config
config map[string]interface{}
runtimeName string
setAsDefault bool
expectedDefaultRuntimeName interface{}
@@ -63,7 +63,7 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) {
if tc.config == nil {
tc.config = make(map[string]interface{})
}
err := tc.config.AddRuntime(tc.runtimeName, "", tc.setAsDefault)
err := UpdateConfig(tc.config, tc.runtimeName, "", tc.setAsDefault)
require.NoError(t, err)
defaultRuntimeName := tc.config["default-runtime"]
@@ -74,7 +74,7 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) {
func TestUpdateConfigRuntimes(t *testing.T) {
testCases := []struct {
config Config
config map[string]interface{}
runtimes map[string]string
expectedConfig map[string]interface{}
}{
@@ -198,7 +198,7 @@ func TestUpdateConfigRuntimes(t *testing.T) {
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
for runtimeName, runtimePath := range tc.runtimes {
err := tc.config.AddRuntime(runtimeName, runtimePath, false)
err := UpdateConfig(tc.config, runtimeName, runtimePath, false)
require.NoError(t, err)
}

View File

@@ -1,85 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
type featureName string
const (
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy")
)
// features specifies a set of named features.
type features struct {
GDS *feature `toml:"gds,omitempty"`
MOFED *feature `toml:"mofed,omitempty"`
NVSWITCH *feature `toml:"nvswitch,omitempty"`
GDRCopy *feature `toml:"gdrcopy,omitempty"`
}
type feature bool
// IsEnabled checks whether a specified named feature is enabled.
// An optional list of environments to check for feature-specific environment
// variables can also be supplied.
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
featureEnvvars := map[featureName]string{
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
}
envvar := featureEnvvars[n]
switch n {
case FeatureGDS:
return fs.GDS.isEnabled(envvar, in...)
case FeatureMOFED:
return fs.MOFED.isEnabled(envvar, in...)
case FeatureNVSWITCH:
return fs.NVSWITCH.isEnabled(envvar, in...)
case FeatureGDRCopy:
return fs.GDRCopy.isEnabled(envvar, in...)
default:
return false
}
}
// isEnabled checks whether a feature is enabled.
// If the enabled value is explicitly set, this is returned, otherwise the
// associated envvar is checked in the specified getenver for the string "enabled"
// A CUDA container / image can be passed here.
func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
if f != nil {
return bool(*f)
}
if envvar == "" {
return false
}
for _, in := range ins {
if in.Getenv(envvar) == "enabled" {
return true
}
}
return false
}
type getenver interface {
Getenv(string) string
}

View File

@@ -1,36 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
// RuntimeHookConfig stores the config options for the NVIDIA Container Runtime
type RuntimeHookConfig struct {
// Path specifies the path to the NVIDIA Container Runtime hook binary.
// If an executable name is specified, this will be resolved in the path.
Path string `toml:"path"`
// SkipModeDetection disables the mode check for the runtime hook.
SkipModeDetection bool `toml:"skip-mode-detection"`
}
// GetDefaultRuntimeHookConfig defines the default values for the config
func GetDefaultRuntimeHookConfig() (*RuntimeHookConfig, error) {
cfg, err := GetDefault()
if err != nil {
return nil, err
}
return &cfg.NVIDIAContainerRuntimeHookConfig, nil
}

View File

@@ -1,102 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"fmt"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
)
type builder struct {
env map[string]string
mounts []specs.Mount
disableRequire bool
}
// New creates a new CUDA image from the input options.
func New(opt ...Option) (CUDA, error) {
b := &builder{}
for _, o := range opt {
if err := o(b); err != nil {
return CUDA{}, err
}
}
if b.env == nil {
b.env = make(map[string]string)
}
return b.build()
}
// build creates a CUDA image from the builder.
func (b builder) build() (CUDA, error) {
if b.disableRequire {
b.env[envNVDisableRequire] = "true"
}
c := CUDA{
env: b.env,
mounts: b.mounts,
}
return c, nil
}
// Option is a functional option for creating a CUDA image.
type Option func(*builder) error
// WithDisableRequire sets the disable require option.
func WithDisableRequire(disableRequire bool) Option {
return func(b *builder) error {
b.disableRequire = disableRequire
return nil
}
}
// WithEnv sets the environment variables to use when creating the CUDA image.
// Note that this also overwrites the values set with WithEnvMap.
func WithEnv(env []string) Option {
return func(b *builder) error {
envmap := make(map[string]string)
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return fmt.Errorf("invalid environment variable: %v", e)
}
envmap[parts[0]] = parts[1]
}
return WithEnvMap(envmap)(b)
}
}
// WithEnvMap sets the environment variable map to use when creating the CUDA image.
// Note that this also overwrites the values set with WithEnv.
func WithEnvMap(env map[string]string) Option {
return func(b *builder) error {
b.env = env
return nil
}
}
// WithMounts sets the mounts associated with the CUDA image.
func WithMounts(mounts []specs.Mount) Option {
return func(b *builder) error {
b.mounts = mounts
return nil
}
}

View File

@@ -16,18 +16,12 @@
package image
import (
"sort"
"strings"
)
// DriverCapability represents the possible values of NVIDIA_DRIVER_CAPABILITIES
type DriverCapability string
// Constants for the supported driver capabilities
const (
DriverCapabilityAll DriverCapability = "all"
DriverCapabilityNone DriverCapability = "none"
DriverCapabilityCompat32 DriverCapability = "compat32"
DriverCapabilityCompute DriverCapability = "compute"
DriverCapabilityDisplay DriverCapability = "display"
@@ -37,110 +31,24 @@ const (
DriverCapabilityVideo DriverCapability = "video"
)
var (
driverCapabilitiesNone = NewDriverCapabilities()
driverCapabilitiesAll = NewDriverCapabilities("all")
// DefaultDriverCapabilities sets the value for driver capabilities if no value is set.
DefaultDriverCapabilities = NewDriverCapabilities("utility,compute")
// SupportedDriverCapabilities defines the set of all supported driver capabilities.
SupportedDriverCapabilities = NewDriverCapabilities("compute,compat32,graphics,utility,video,display,ngx")
)
// NewDriverCapabilities creates a set of driver capabilities from the specified capabilities
func NewDriverCapabilities(capabilities ...string) DriverCapabilities {
dc := make(DriverCapabilities)
for _, capability := range capabilities {
for _, c := range strings.Split(capability, ",") {
trimmed := strings.TrimSpace(c)
if trimmed == "" {
continue
}
dc[DriverCapability(trimmed)] = true
}
}
return dc
}
// DriverCapabilities represents the NVIDIA_DRIVER_CAPABILITIES set for the specified image.
type DriverCapabilities map[DriverCapability]bool
// Has check whether the specified capability is selected.
func (c DriverCapabilities) Has(capability DriverCapability) bool {
if c.IsAll() {
if c[DriverCapabilityAll] {
return true
}
return c[capability]
}
// Any checks whether any of the specified capabilities are set
// Any checks whether any of the specified capabilites are set
func (c DriverCapabilities) Any(capabilities ...DriverCapability) bool {
if c.IsAll() {
return true
}
for _, cap := range capabilities {
if c.Has(cap) {
return true
}
}
return false
}
// List returns the list of driver capabilities.
// The list is sorted.
func (c DriverCapabilities) List() []string {
var capabilities []string
for capability := range c {
capabilities = append(capabilities, string(capability))
}
sort.Strings(capabilities)
return capabilities
}
// String returns the string repesentation of the driver capabilities.
func (c DriverCapabilities) String() string {
if c.IsAll() {
return "all"
}
return strings.Join(c.List(), ",")
}
// IsAll indicates whether the set of capabilities is `all`
func (c DriverCapabilities) IsAll() bool {
return c[DriverCapabilityAll]
}
// Intersection returns a new set which includes the item in BOTH d and s2.
// For example: d = {a1, a2} s2 = {a2, a3} s1.Intersection(s2) = {a2}
func (c DriverCapabilities) Intersection(s2 DriverCapabilities) DriverCapabilities {
if s2.IsAll() {
return c
}
if c.IsAll() {
return s2
}
intersection := make(DriverCapabilities)
for capability := range s2 {
if c[capability] {
intersection[capability] = true
}
}
return intersection
}
// IsSuperset returns true if and only if d is a superset of s2.
func (c DriverCapabilities) IsSuperset(s2 DriverCapabilities) bool {
if c.IsAll() {
return true
}
for capability := range s2 {
if !c[capability] {
return false
}
}
return true
}

View File

@@ -1,134 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestDriverCapabilitiesIntersection(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
supportedCapabilities DriverCapabilities
expectedIntersection DriverCapabilities
}{
{
capabilities: driverCapabilitiesNone,
supportedCapabilities: driverCapabilitiesNone,
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: driverCapabilitiesAll,
supportedCapabilities: driverCapabilitiesNone,
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: driverCapabilitiesAll,
supportedCapabilities: SupportedDriverCapabilities,
expectedIntersection: SupportedDriverCapabilities,
},
{
capabilities: SupportedDriverCapabilities,
supportedCapabilities: driverCapabilitiesAll,
expectedIntersection: SupportedDriverCapabilities,
},
{
capabilities: driverCapabilitiesNone,
supportedCapabilities: driverCapabilitiesAll,
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: driverCapabilitiesNone,
supportedCapabilities: NewDriverCapabilities("cap1"),
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: NewDriverCapabilities("cap0,cap1"),
supportedCapabilities: NewDriverCapabilities("cap1,cap0"),
expectedIntersection: NewDriverCapabilities("cap0,cap1"),
},
{
capabilities: DefaultDriverCapabilities,
supportedCapabilities: SupportedDriverCapabilities,
expectedIntersection: DefaultDriverCapabilities,
},
{
capabilities: NewDriverCapabilities("compute,compat32,graphics,utility,video,display"),
supportedCapabilities: NewDriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
expectedIntersection: NewDriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
{
capabilities: NewDriverCapabilities("cap1"),
supportedCapabilities: driverCapabilitiesNone,
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: NewDriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
supportedCapabilities: NewDriverCapabilities("compute,compat32,graphics,utility,video,display"),
expectedIntersection: NewDriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
intersection := tc.supportedCapabilities.Intersection(tc.capabilities)
require.EqualValues(t, tc.expectedIntersection, intersection)
})
}
}
func TestDriverCapabilitiesList(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
expected []string
}{
{
capabilities: NewDriverCapabilities(""),
},
{
capabilities: NewDriverCapabilities(" "),
},
{
capabilities: NewDriverCapabilities(","),
},
{
capabilities: NewDriverCapabilities(",cap"),
expected: []string{"cap"},
},
{
capabilities: NewDriverCapabilities("cap,"),
expected: []string{"cap"},
},
{
capabilities: NewDriverCapabilities("cap0,,cap1"),
expected: []string{"cap0", "cap1"},
},
{
capabilities: NewDriverCapabilities("cap1,cap0,cap3"),
expected: []string{"cap0", "cap1", "cap3"},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
require.EqualValues(t, tc.expected, tc.capabilities.List())
})
}
}

View File

@@ -18,13 +18,11 @@ package image
import (
"fmt"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
"tags.cncf.io/container-device-interface/pkg/parser"
)
const (
@@ -39,62 +37,55 @@ const (
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type CUDA struct {
env map[string]string
mounts []specs.Mount
}
type CUDA map[string]string
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
// The process environment is read (if present) to construc the CUDA Image.
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
var env []string
if spec != nil && spec.Process != nil {
env = spec.Process.Env
if spec == nil || spec.Process == nil {
return NewCUDAImageFromEnv(nil)
}
return New(
WithEnv(env),
WithMounts(spec.Mounts),
)
return NewCUDAImageFromEnv(spec.Process.Env)
}
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
// is a list of strings of the form ENVAR=VALUE.
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
return New(WithEnv(env))
}
c := make(CUDA)
// Getenv returns the value of the specified environment variable.
// If the environment variable is not specified, an empty string is returned.
func (i CUDA) Getenv(key string) string {
return i.env[key]
}
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid environment variable: %v", e)
}
c[parts[0]] = parts[1]
}
// HasEnvvar checks whether the specified envvar is defined in the image.
func (i CUDA) HasEnvvar(key string) bool {
_, exists := i.env[key]
return exists
return c, nil
}
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
// image is considered legacy if it has a CUDA_VERSION environment variable defined
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
func (i CUDA) IsLegacy() bool {
legacyCudaVersion := i.env[envCUDAVersion]
cudaRequire := i.env[envNVRequireCUDA]
legacyCudaVersion := i[envCUDAVersion]
cudaRequire := i[envNVRequireCUDA]
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
// variables.
func (i CUDA) GetRequirements() ([]string, error) {
if i.HasDisableRequire() {
return nil, nil
}
// TODO: We need not process this if disable require is set, but this will be done
// in a single follow-up to ensure that the behavioural change is accurately captured.
// if i.HasDisableRequire() {
// return nil, nil
// }
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
for name, value := range i.env {
for name, value := range i {
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
requirements = append(requirements, value)
}
@@ -113,7 +104,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
// to a valid (true) boolean value this can be used to disable the requirement checks
func (i CUDA) HasDisableRequire() bool {
if disable, exists := i.env[envNVDisableRequire]; exists {
if disable, exists := i[envNVDisableRequire]; exists {
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
d, _ := strconv.ParseBool(disable)
return d
@@ -124,12 +115,12 @@ func (i CUDA) HasDisableRequire() bool {
// DevicesFromEnvvars returns the devices requested by the image through environment variables
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
// We concantenate all the devices from the specified env.
// We concantenate all the devices from the specified envvars.
var isSet bool
var devices []string
requested := make(map[string]bool)
for _, envVar := range envVars {
if devs, ok := i.env[envVar]; ok {
if devs, ok := i[envVar]; ok {
isSet = true
for _, d := range strings.Split(devs, ",") {
trimmed := strings.TrimSpace(d)
@@ -157,21 +148,20 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
// GetDriverCapabilities returns the requested driver capabilities.
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
env := i.env[envNVDriverCapabilities]
env := i[envNVDriverCapabilities]
capabilities := make(DriverCapabilities)
capabilites := make(DriverCapabilities)
for _, c := range strings.Split(env, ",") {
capabilities[DriverCapability(c)] = true
capabilites[DriverCapability(c)] = true
}
return capabilities
return capabilites
}
func (i CUDA) legacyVersion() (string, error) {
cudaVersion := i.env[envCUDAVersion]
majorMinor, err := parseMajorMinorVersion(cudaVersion)
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
if err != nil {
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
return "", fmt.Errorf("invalid CUDA version: %v", err)
}
return majorMinor, nil
@@ -198,79 +188,3 @@ func parseMajorMinorVersion(version string) (string, error) {
}
return majorMinor, nil
}
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
var hasCDIdevice bool
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
if !parser.IsQualifiedName(device) {
return false
}
hasCDIdevice = true
}
for _, device := range i.DevicesFromMounts() {
if !strings.HasPrefix(device, "cdi/") {
return false
}
hasCDIdevice = true
}
return hasCDIdevice
}
const (
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
)
// DevicesFromMounts returns a list of device specified as mounts.
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
func (i CUDA) DevicesFromMounts() []string {
root := filepath.Clean(deviceListAsVolumeMountsRoot)
seen := make(map[string]bool)
var devices []string
for _, m := range i.mounts {
source := filepath.Clean(m.Source)
// Only consider mounts who's host volume is /dev/null
if source != "/dev/null" {
continue
}
destination := filepath.Clean(m.Destination)
if seen[destination] {
continue
}
seen[destination] = true
// Only consider container mount points that begin with 'root'
if !strings.HasPrefix(destination, root) {
continue
}
// Grab the full path beyond 'root' and add it to the list of devices
device := strings.Trim(strings.TrimPrefix(destination, root), "/")
if len(device) == 0 {
continue
}
devices = append(devices, device)
}
return devices
}
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
func (i CUDA) CDIDevicesFromMounts() []string {
var devices []string
for _, mountDevice := range i.DevicesFromMounts() {
if !strings.HasPrefix(mountDevice, "cdi/") {
continue
}
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
if len(parts) != 3 {
continue
}
vendor := parts[0]
class := parts[1]
device := parts[2]
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
}
return devices
}

Some files were not shown because too many files have changed in this diff Show More