Compare commits

..

17 Commits

Author SHA1 Message Date
Evan Lezar
d167812ce3 Merge branch 'cherry-pick-enable-cdi' into 'release-1.14'
Add cdi.enabled option to runtime configure

See merge request nvidia/container-toolkit/container-toolkit!539
2024-01-19 14:40:29 +00:00
Christopher Desiniotis
7ff23999e8 Add option to nvidia-ctk to enable CDI in docker
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-19 15:36:15 +01:00
Evan Lezar
a9b01a43bc Add cdi.enabled option to runtime configure
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-19 15:35:14 +01:00
Evan Lezar
ccff00bc30 Merge branch 'bump-version-v1.14.4' into 'release-1.14'
Bump version to v1.14.4

See merge request nvidia/container-toolkit/container-toolkit!537
2024-01-18 12:17:55 +00:00
Evan Lezar
f7d54200c6 Bump version to v1.14.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-18 13:17:17 +01:00
Evan Lezar
29fd206f3a Merge branch 'cherry-pick-1.14.4' into 'release-1.14'
Cherry pick changes for v1.14.4 release

See merge request nvidia/container-toolkit/container-toolkit!534
2024-01-17 22:51:28 +00:00
Evan Lezar
cfe0d5d07e Skip component updates
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 23:06:49 +01:00
Evan Lezar
9ab640b2be Set libnvidia-container branch
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 22:46:13 +01:00
Evan Lezar
9d2e4b48bc Update libnvidia-container to 1.14.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 22:33:44 +01:00
Evan Lezar
c050bcf081 Merge branch 'add-crun-as-configured-runtime' into 'main'
Set default low-level runtimes to runc, crun

See merge request nvidia/container-toolkit/container-toolkit!536
2024-01-17 22:29:58 +01:00
Evan Lezar
27d0fa4ee2 Merge branch 'bump-cuda-12.3.1' into 'main'
Bump CUDA base image to 12.3.1

See merge request nvidia/container-toolkit/container-toolkit!535
2024-01-11 15:27:27 +01:00
Evan Lezar
e0e22fdceb Merge branch 'fix-user-group' into 'main'
Fix bug in determining CLI user on SUSE systems

See merge request nvidia/container-toolkit/container-toolkit!532
2024-01-11 15:27:25 +01:00
Evan Lezar
c1eae0deda Merge branch 'libnvdxgdmal' into 'main'
Add libnvdxgdmal library

See merge request nvidia/container-toolkit/container-toolkit!529
2024-01-11 15:27:01 +01:00
Evan Lezar
68f0203a49 Merge branch 'remove-libseccomp-dependency' into 'main'
Remove libseccomp package dependency

See merge request nvidia/container-toolkit/container-toolkit!531
2024-01-11 15:26:37 +01:00
Evan Lezar
cc688f7c75 Merge branch 'log-requested-mode' into 'main'
Log explicitly requested runtime mode

See merge request nvidia/container-toolkit/container-toolkit!527
2024-01-11 15:26:03 +01:00
Evan Lezar
7566eb124a Merge branch 'fix-config-update-command' into 'main'
Switch to reflect package for config updates

See merge request nvidia/container-toolkit/container-toolkit!500
2024-01-11 15:25:33 +01:00
Evan Lezar
eb5d50abc4 Merge branch 'include-nvoptix' into 'main'
Update list of graphics mounts

See merge request nvidia/container-toolkit/container-toolkit!501
2024-01-11 15:25:09 +01:00
785 changed files with 13503 additions and 26264 deletions

View File

@@ -19,6 +19,7 @@ default:
variables:
GIT_SUBMODULE_STRATEGY: recursive
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
BUILD_MULTI_ARCH_IMAGES: "true"
stages:
@@ -144,7 +145,7 @@ trigger-pipeline:
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
script:
- make -f deployments/container/Makefile test-${DIST}
- make -f build/container/Makefile test-${DIST}
# Define the test targets
test-packaging:
@@ -194,7 +195,7 @@ test-packaging:
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
# Target
- make -f deployments/container/Makefile push-${DIST}
- make -f build/container/Makefile push-${DIST}
# Define a staging release step that pushes an image to an internal "staging" repository
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
@@ -224,6 +225,13 @@ test-packaging:
OUT_IMAGE_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}"
# Define the release jobs
release:staging-centos7:
extends:
- .release:staging
- .dist-centos7
needs:
- image-centos7
release:staging-ubi8:
extends:
- .release:staging

View File

@@ -1,56 +0,0 @@
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "gomod"
target-branch: main
directory: "/"
schedule:
interval: "weekly"
day: "sunday"
ignore:
- dependency-name: k8s.io/*
labels:
- dependencies
- package-ecosystem: "docker"
directory: "/deployments/container"
schedule:
interval: "daily"
- package-ecosystem: "gomod"
# This defines a specific dependabot rule for the latest release-* branch.
target-branch: release-1.14
directory: "/"
schedule:
interval: "weekly"
day: "sunday"
ignore:
- dependency-name: k8s.io/*
labels:
- dependencies
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "github-actions"
target-branch: gh-pages
directory: "/"
schedule:
interval: "weekly"
day: "monday"
# Allow dependabot to update the libnvidia-container submodule.
- package-ecosystem: "gitsubmodule"
target-branch: main
directory: "/"
allow:
- dependency-name: "third_party/libnvidia-container"
schedule:
interval: "daily"
labels:
- dependencies
- libnvidia-container

113
.github/workflows/blossom-ci.yml vendored Normal file
View File

@@ -0,0 +1,113 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A workflow to trigger ci on hybrid infra (github + self hosted runner)
name: Blossom-CI
on:
issue_comment:
types: [created]
workflow_dispatch:
inputs:
platform:
description: 'runs-on argument'
required: false
args:
description: 'argument'
required: false
jobs:
Authorization:
name: Authorization
runs-on: blossom
outputs:
args: ${{ env.args }}
# This job only runs for pull request comments
if: |
contains( '\
anstockatnv,\
rorajani,\
cdesiniotis,\
shivamerla,\
ArangoGutierrez,\
elezar,\
klueska,\
zvonkok,\
', format('{0},', github.actor)) &&
github.event.comment.body == '/blossom-ci'
steps:
- name: Check if comment is issued by authorized person
run: blossom-ci
env:
OPERATION: 'AUTH'
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
Vulnerability-scan:
name: Vulnerability scan
needs: [Authorization]
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
lfs: 'true'
# repo specific steps
#- name: Setup java
# uses: actions/setup-java@v1
# with:
# java-version: 1.8
# add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
#- name: Setup blackduck properties
# run: |
# PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
# echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
# echo detect.maven.included.scopes=compile >> application.properties
- name: Run blossom action
uses: NVIDIA/blossom-action@main
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
with:
args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
Job-trigger:
name: Start ci job
needs: [Vulnerability-scan]
runs-on: blossom
steps:
- name: Start ci job
run: blossom-ci
env:
OPERATION: 'START-CI-JOB'
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Upload-Log:
name: Upload log
runs-on: blossom
if : github.event_name == 'workflow_dispatch'
steps:
- name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here)
run: blossom-ci
env:
OPERATION: 'POST-PROCESSING'
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -1,76 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Golang
on:
pull_request:
types:
- opened
- synchronize
branches:
- main
- release-*
push:
branches:
- main
- release-*
jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION :=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- name: Lint
uses: golangci/golangci-lint-action@v4
with:
version: latest
args: -v --timeout 5m
skip-cache: true
- name: Check golang modules
run: make check-vendor
test:
name: Unit test
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION :=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- run: make test
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- name: Build
run: make docker-build

View File

@@ -1,138 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run this workflow on pull requests
name: image
on:
pull_request:
types:
- opened
- synchronize
branches:
- main
- release-*
push:
branches:
- main
- release-*
jobs:
packages:
runs-on: ubuntu-latest
strategy:
matrix:
target:
- ubuntu18.04-arm64
- ubuntu18.04-amd64
- ubuntu18.04-ppc64le
- centos7-aarch64
- centos7-x86_64
- centos8-ppc64le
ispr:
- ${{github.event_name == 'pull_request'}}
exclude:
- ispr: true
target: ubuntu18.04-arm64
- ispr: true
target: ubuntu18.04-ppc64le
- ispr: true
target: centos7-aarch64
- ispr: true
target: centos8-ppc64le
fail-fast: false
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: build ${{ matrix.target }} packages
run: |
sudo apt-get install -y coreutils build-essential sed git bash make
echo "Building packages"
./scripts/build-packages.sh ${{ matrix.target }}
- name: 'Upload Artifacts'
uses: actions/upload-artifact@v4
with:
compression-level: 0
name: toolkit-container-${{ matrix.target }}-${{ github.run_id }}
path: ${{ github.workspace }}/dist/*
image:
runs-on: ubuntu-latest
strategy:
matrix:
dist:
- ubuntu20.04
- ubi8
- packaging
ispr:
- ${{github.event_name == 'pull_request'}}
exclude:
- ispr: true
dist: ubi8
needs: packages
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Calculate build vars
id: vars
run: |
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
echo "${REPO_FULL_NAME}"
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
PUSH_ON_BUILD="false"
BUILD_MULTI_ARCH_IMAGES="false"
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
if [[ "${{ github.actor }}" != "dependabot[bot]" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
# For non-fork PRs that are not created by dependabot we do push images
PUSH_ON_BUILD="true"
fi
elif [[ "${{ github.event_name }}" == "push" ]]; then
# On push events we do generate images and enable muilti-arch builds
PUSH_ON_BUILD="true"
BUILD_MULTI_ARCH_IMAGES="true"
fi
echo "PUSH_ON_BUILD=${PUSH_ON_BUILD}" >> $GITHUB_ENV
echo "BUILD_MULTI_ARCH_IMAGES=${BUILD_MULTI_ARCH_IMAGES}" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get built packages
uses: actions/download-artifact@v4
with:
path: ${{ github.workspace }}/dist/
pattern: toolkit-container-*-${{ github.run_id }}
merge-multiple: true
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build image
env:
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/container-toolkit
VERSION: ${COMMIT_SHORT_SHA}
run: |
echo "${VERSION}"
make -f deployments/container/Makefile build-${{ matrix.dist }}

22
.github/workflows/pre-sanity.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: Run pre sanity
# run this workflow for each commit
on: [pull_request]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Build dev image
run: make .build-image
- name: Build
run: make docker-build
- name: Tests
run: make docker-coverage
- name: Checks
run: make docker-check

View File

@@ -15,6 +15,68 @@
include:
- .common-ci.yml
build-dev-image:
stage: image
script:
- apk --no-cache add make bash
- make .build-image
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- make .push-build-image
.requires-build-image:
image: "${BUILDIMAGE}"
.go-check:
extends:
- .requires-build-image
stage: go-checks
fmt:
extends:
- .go-check
script:
- make assert-fmt
vet:
extends:
- .go-check
script:
- make vet
lint:
extends:
- .go-check
script:
- make lint
allow_failure: true
ineffassign:
extends:
- .go-check
script:
- make ineffassign
allow_failure: true
misspell:
extends:
- .go-check
script:
- make misspell
go-build:
extends:
- .requires-build-image
stage: go-build
script:
- make build
unit-tests:
extends:
- .requires-build-image
stage: unit-tests
script:
- make coverage
# Define the package build helpers
.multi-arch-build:
before_script:
@@ -76,12 +138,24 @@ package-centos7-x86_64:
- .dist-centos7
- .arch-x86_64
package-centos8-aarch64:
extends:
- .package-build
- .dist-centos8
- .arch-aarch64
package-centos8-ppc64le:
extends:
- .package-build
- .dist-centos8
- .arch-ppc64le
package-centos8-x86_64:
extends:
- .package-build
- .dist-centos8
- .arch-x86_64
package-ubuntu18.04-amd64:
extends:
- .package-build
@@ -126,7 +200,15 @@ package-ubuntu18.04-ppc64le:
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
script:
- make -f deployments/container/Makefile build-${DIST}
- make -f build/container/Makefile build-${DIST}
image-centos7:
extends:
- .image-build
- .package-artifacts
- .dist-centos7
needs:
- package-centos7-x86_64
image-ubi8:
extends:
@@ -156,6 +238,8 @@ image-packaging:
- .package-artifacts
- .dist-packaging
needs:
- job: package-centos8-aarch64
- job: package-centos8-x86_64
- job: package-ubuntu18.04-amd64
- job: package-ubuntu18.04-arm64
- job: package-amazonlinux2-aarch64
@@ -232,3 +316,4 @@ test-docker-ubuntu20.04:
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04

4
.gitmodules vendored
View File

@@ -1,4 +1,4 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://github.com/NVIDIA/libnvidia-container.git
branch = main
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = release-1.14

View File

@@ -1,36 +0,0 @@
run:
deadline: 10m
linters:
enable:
- contextcheck
- gocritic
- gofmt
- goimports
- gosec
- gosimple
- govet
- ineffassign
- misspell
- staticcheck
- unconvert
linters-settings:
goimports:
local-prefixes: github.com/NVIDIA/nvidia-container-toolkit
issues:
exclude:
# The legacy hook relies on spec.Hooks.Prestart, which is deprecated as of the v1.2.0 OCI runtime spec.
- "SA1019:(.+).Prestart is deprecated(.+)"
exclude-rules:
# Exclude the gocritic dupSubExpr issue for cgo files.
- path: internal/dxcore/dxcore.go
linters:
- gocritic
text: dupSubExpr
# Exclude the checks for usage of returns to config.Delete(Path) in the crio and containerd config packages.
- path: pkg/config/engine/
linters:
- errcheck
text: config.Delete

View File

@@ -67,7 +67,12 @@ variables:
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
script:
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
- make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
image-centos7:
extends:
- .dist-centos7
- .image-pull
image-ubi8:
extends:
@@ -127,6 +132,14 @@ image-packaging:
- policy_evaluation.json
# Define the scan targets
scan-centos7-amd64:
extends:
- .dist-centos7
- .platform-amd64
- .scan
needs:
- image-centos7
scan-ubuntu20.04-amd64:
extends:
- .dist-ubuntu20.04
@@ -230,6 +243,11 @@ release:staging-ubuntu20.04:
# Define the external release targets
# Release to NGC
release:ngc-centos7:
extends:
- .dist-centos7
- .release:ngc
release:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04

View File

@@ -1,62 +1,19 @@
# NVIDIA Container Toolkit Changelog
## v1.15.0
* Remove `nvidia-container-runtime` and `nvidia-docker2` packages.
* Use `XDG_DATA_DIRS` environment variable when locating config files such as graphics config files.
* Add support for v0.7.0 Container Device Interface (CDI) specification.
* Add `--config-search-path` option to `nvidia-ctk cdi generate` command. These paths are used when locating driver files such as graphics config files.
* Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
* Add support for v1.2.0 OCI Runtime specification.
* Explicitly set `NVIDIA_VISIBLE_DEVICES=void` in generated CDI specifications. This prevents the NVIDIA Container Runtime from making additional modifications.
* [libnvidia-container] Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
* [toolkit-container] Bump CUDA base image version to 12.4.1
## v1.15.0-rc.4
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
CDI spec to be generated that includes GPUs by index and UUID.
* Set the default `--device-name-strategy` for the `nvidia-ctk cdi generate` command to `[index, uuid]`.
* Remove `libnvidia-container0` jetpack dependency included for legacy Tegra-based systems.
* Add `NVIDIA_VISIBLE_DEVICES=void` to generated CDI specifications.
* [toolkit-container] Remove centos7 image. The ubi8 image can be used on all RPM-based platforms.
* [toolkit-container] Bump CUDA base image version to 12.3.2
## v1.15.0-rc.3
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.
## v1.15.0-rc.2
* Extend the `runtime.nvidia.com/gpu` CDI kind to support full-GPUs and MIG devices specified by index or UUID.
* Fix bug when specifying `--dev-root` for Tegra-based systems.
* Log explicitly requested runtime mode.
* Remove package dependency on libseccomp.
* Added detection of libnvdxgdmal.so.1 on WSL2
* Use devRoot to resolve MIG device nodes.
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
* Add `crun` to the list of configured low-level runtimes.
* Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command.
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker.
* Add discovery of the GDRCopy device (`gdrdrv`) if the `NVIDIA_GDRCOPY` environment variable of the container is set to `enabled`
* [toolkit-container] Bump CUDA base image version to 12.3.1.
## v1.15.0-rc.1
* Skip update of ldcache in containers without ldconfig. The .so.SONAME symlinks are still created.
* Normalize ldconfig path on use. This automatically adjust the ldconfig setting applied to ldconfig.real on systems where this exists.
## v1.14.4
* Include `nvidia/nvoptix.bin` in list of graphics mounts.
* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts.
* Add support for `--library-search-paths` to `nvidia-ctk cdi generate` command.
* Add support for injecting /dev/nvidia-nvswitch* devices if the NVIDIA_NVSWITCH=enabled envvar is specified.
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly.
* Add `--relative-to` option to `nvidia-ctk transform root` command. This controls whether the root transformation is applied to host or container paths.
* Added automatic CDI spec generation when the `runtime.nvidia.com/gpu=all` device is requested by a container.
* Log explicitly requested runtime mode.
* Remove package dependency on libseccomp.
* Added detection of libnvdxgdmal.so.1 on WSL2.
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
* Add `crun` to the list of configured low-level runtimes.
* Add `--cdi.enabled` option to `nvidia-ctk runtime configure` command to enable CDI in containerd.
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
* [libnvidia-container] Fix device permission check when using cgroupv2 (fixes #227)
* [toolkit-container] Bump CUDA base image version to 12.3.1.
* [libnvidia-container] Added detection of libnvdxgdmal.so.1 on WSL2.
## v1.14.3
* [toolkit-container] Bump CUDA base image version to 12.2.2.

View File

@@ -19,7 +19,7 @@ where `TARGET` is a make target that is valid for each of the sub-components.
These include:
* `ubuntu18.04-amd64`
* `centos7-x86_64`
* `centos8-x86_64`
If no `TARGET` is specified, all valid release targets are built.

142
Jenkinsfile vendored Normal file
View File

@@ -0,0 +1,142 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
podTemplate (cloud:'sw-gpu-cloudnative',
containers: [
containerTemplate(name: 'docker', image: 'docker:dind', ttyEnabled: true, privileged: true),
containerTemplate(name: 'golang', image: 'golang:1.16.3', ttyEnabled: true)
]) {
node(POD_LABEL) {
def scmInfo
stage('checkout') {
scmInfo = checkout(scm)
}
stage('dependencies') {
container('golang') {
sh 'GO111MODULE=off go get -u github.com/client9/misspell/cmd/misspell'
sh 'GO111MODULE=off go get -u github.com/gordonklaus/ineffassign'
sh 'GO111MODULE=off go get -u golang.org/x/lint/golint'
}
container('docker') {
sh 'apk add --no-cache make bash git'
}
}
stage('check') {
parallel (
getGolangStages(["assert-fmt", "lint", "vet", "ineffassign", "misspell"])
)
}
stage('test') {
parallel (
getGolangStages(["test"])
)
}
def versionInfo
stage('version') {
container('docker') {
versionInfo = getVersionInfo(scmInfo)
println "versionInfo=${versionInfo}"
}
}
def dist = 'ubuntu20.04'
def arch = 'amd64'
def stageLabel = "${dist}-${arch}"
stage('build-one') {
container('docker') {
stage (stageLabel) {
sh "make ${dist}-${arch}"
}
}
}
stage('release') {
container('docker') {
stage (stageLabel) {
def component = 'main'
def repository = 'sw-gpu-cloudnative-debian-local/pool/main/'
def uploadSpec = """{
"files":
[ {
"pattern": "./dist/${dist}/${arch}/*.deb",
"target": "${repository}",
"props": "deb.distribution=${dist};deb.component=${component};deb.architecture=${arch}"
}
]
}"""
sh "echo starting release with versionInfo=${versionInfo}"
if (versionInfo.isTag) {
// upload to artifactory repository
def server = Artifactory.server 'sw-gpu-artifactory'
server.upload spec: uploadSpec
} else {
sh "echo skipping release for non-tagged build"
}
}
}
}
}
}
def getGolangStages(def targets) {
stages = [:]
for (t in targets) {
stages[t] = getLintClosure(t)
}
return stages
}
def getLintClosure(def target) {
return {
container('golang') {
stage(target) {
sh "make ${target}"
}
}
}
}
// getVersionInfo returns a hash of version info
def getVersionInfo(def scmInfo) {
def versionInfo = [
isTag: isTag(scmInfo.GIT_BRANCH)
]
scmInfo.each { k, v -> versionInfo[k] = v }
return versionInfo
}
def isTag(def branch) {
if (!branch.startsWith('v')) {
return false
}
def version = shOutput('git describe --all --exact-match --always')
return version == "tags/${branch}"
}
def shOuptut(def script) {
return sh(script: script, returnStdout: true).trim()
}

View File

@@ -38,8 +38,8 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
CHECK_TARGETS := lint
MAKE_TARGETS := binaries build check fmt test examples cmds coverage generate licenses vendor check-vendor $(CHECK_TARGETS)
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
@@ -53,26 +53,22 @@ CLI_VERSION = $(VERSION)
endif
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
GOOS ?= linux
binaries: cmds
ifneq ($(PREFIX),)
cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)
ifneq ($(shell uname),Darwin)
EXTLDFLAGS = -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
else
EXTLDFLAGS = -Wl,-undefined,dynamic_lookup
endif
$(CMD_TARGETS): cmd-%:
go build -ldflags "-s -w '-extldflags=$(EXTLDFLAGS)' -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
build:
go build ./...
GOOS=$(GOOS) go build ./...
examples: $(EXAMPLE_TARGETS)
$(EXAMPLE_TARGETS): example-%:
go build ./examples/$(*)
GOOS=$(GOOS) go build ./examples/$(*)
all: check test build binary
check: $(CHECK_TARGETS)
@@ -82,28 +78,37 @@ fmt:
go list -f '{{.Dir}}' $(MODULE)/... \
| xargs gofmt -s -l -w
# Apply goimports -local github.com/NVIDIA/container-toolkit to the codebase
goimports:
go list -f {{.Dir}} $(MODULE)/... \
| xargs goimports -local $(MODULE) -w
assert-fmt:
go list -f '{{.Dir}}' $(MODULE)/... \
| xargs gofmt -s -l > fmt.out
@if [ -s fmt.out ]; then \
echo "\nERROR: The following files are not formatted:\n"; \
cat fmt.out; \
rm fmt.out; \
exit 1; \
else \
rm fmt.out; \
fi
ineffassign:
ineffassign $(MODULE)/...
lint:
golangci-lint run ./...
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
vendor:
go mod tidy
go mod vendor
go mod verify
misspell:
misspell $(MODULE)/...
check-vendor: vendor
git diff --quiet HEAD -- go.mod go.sum vendor
vet:
go vet $(MODULE)/...
licenses:
go-licenses csv $(MODULE)/...
COVERAGE_FILE := coverage.out
test: build cmds
go test -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
coverage: test
cat $(COVERAGE_FILE) | grep -v "_mock.go" > $(COVERAGE_FILE).no-mocks
@@ -112,15 +117,32 @@ coverage: test
generate:
go generate $(MODULE)/...
$(DOCKER_TARGETS): docker-%:
@echo "Running 'make $(*)' in container image $(BUILDIMAGE)"
# Generate an image for containerized builds
# Note: This image is local only
.PHONY: .build-image .pull-build-image .push-build-image
.build-image: docker/Dockerfile.devel
if [ x"$(SKIP_IMAGE_BUILD)" = x"" ]; then \
$(DOCKER) build \
--progress=plain \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--tag $(BUILDIMAGE) \
-f $(^) \
docker; \
fi
.pull-build-image:
$(DOCKER) pull $(BUILDIMAGE)
.push-build-image:
$(DOCKER) push $(BUILDIMAGE)
$(DOCKER_TARGETS): docker-%: .build-image
@echo "Running 'make $(*)' in docker container $(BUILDIMAGE)"
$(DOCKER) run \
--rm \
-e GOCACHE=/tmp/.cache/go \
-e GOMODCACHE=/tmp/.cache/gomod \
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
-v $(PWD):/work \
-w /work \
-e GOCACHE=/tmp/.cache \
-v $(PWD):$(PWD) \
-w $(PWD) \
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE) \
make $(*)
@@ -131,10 +153,8 @@ PHONY: .shell
$(DOCKER) run \
--rm \
-ti \
-e GOCACHE=/tmp/.cache/go \
-e GOMODCACHE=/tmp/.cache/gomod \
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
-v $(PWD):/work \
-w /work \
-e GOCACHE=/tmp/.cache \
-v $(PWD):$(PWD) \
-w $(PWD) \
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE)

View File

@@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvidia/cuda:12.4.1-base-ubi8 as build
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
RUN yum install -y \
wget make git gcc \
@@ -29,7 +31,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -48,7 +50,17 @@ COPY . .
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvidia/cuda:12.4.1-base-ubi8
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG BASE_DIST
# See https://www.centos.org/centos-linux-eol/
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
( \
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
)
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=void

View File

@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
FROM nvidia/cuda:12.4.1-base-ubuntu20.04
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG ARTIFACTS_ROOT
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
@@ -22,6 +24,7 @@ COPY ${ARTIFACTS_ROOT} /artifacts/packages/
WORKDIR /artifacts/packages
# build-args are added to the manifest.txt file below.
ARG BASE_DIST
ARG PACKAGE_DIST
ARG PACKAGE_VERSION
ARG GIT_BRANCH

View File

@@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvidia/cuda:12.4.1-base-ubuntu20.04 as build
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
RUN apt-get update && \
apt-get install -y wget make git gcc \
@@ -29,7 +31,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -47,7 +49,7 @@ COPY . .
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvcr.io/nvidia/cuda:12.4.1-base-ubuntu20.04
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
RUN rm -f /etc/apt/sources.list.d/cuda.list
@@ -73,6 +75,14 @@ ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH ${TARGETARCH}
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container/stable"
ARG LIBNVIDIA_CONTAINER0_VERSION
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
fi
RUN dpkg -i \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \

View File

@@ -45,7 +45,7 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DEFAULT_PUSH_TARGET := ubuntu20.04
DISTRIBUTIONS := ubuntu20.04 ubi8
DISTRIBUTIONS := ubuntu20.04 ubi8 centos7
META_TARGETS := packaging
@@ -56,9 +56,9 @@ TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
include $(CURDIR)/deployments/container/native-only.mk
include $(CURDIR)/build/container/native-only.mk
else
include $(CURDIR)/deployments/container/multi-arch.mk
include $(CURDIR)/build/container/multi-arch.mk
endif
# For the default push target we also push a short tag equal to the version.
@@ -84,7 +84,7 @@ push-short:
build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SUFFIX)
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
@@ -96,7 +96,10 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
--tag $(IMAGE) \
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
--build-arg BASE_DIST="$(BASE_DIST)" \
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg VERSION="$(VERSION)" \
@@ -108,12 +111,20 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
$(CURDIR)
build-ubuntu%: BASE_DIST = $(*)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
build-ubi8: DOCKERFILE_SUFFIX := ubi8
build-ubi8: BASE_DIST := ubi8
build-ubi8: DOCKERFILE_SUFFIX := centos
build-ubi8: PACKAGE_DIST = centos7
build-centos7: BASE_DIST = $(*)
build-centos7: DOCKERFILE_SUFFIX := centos
build-centos7: PACKAGE_DIST = $(BASE_DIST)
build-packaging: BASE_DIST := ubuntu20.04
build-packaging: DOCKERFILE_SUFFIX := packaging
build-packaging: PACKAGE_ARCH := amd64
build-packaging: PACKAGE_DIST = all
@@ -134,7 +145,9 @@ test-packaging:
@echo "Testing package image contents"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/aarch64" || echo "Missing centos7/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"

View File

@@ -16,6 +16,9 @@ PUSH_ON_BUILD ?= false
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
# We only have x86_64 packages for centos7
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
# We only generate amd64 image for ubuntu18.04
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

View File

@@ -9,10 +9,9 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
)
const (
@@ -23,7 +22,6 @@ const (
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)
@@ -39,7 +37,6 @@ type nvidiaConfig struct {
Devices string
MigConfigDevices string
MigMonitorDevices string
ImexChannels string
DriverCapabilities string
// Requirements defines the requirements DSL for the container to run.
// This is empty if no specific requirements are needed, or if requirements are
@@ -177,7 +174,7 @@ func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *stri
// if specified.
var hasSwarmEnvvar bool
for _, envvar := range swarmResourceEnvvars {
if image.HasEnvvar(envvar) {
if _, exists := image[envvar]; exists {
hasSwarmEnvvar = true
break
}
@@ -260,39 +257,28 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
return nil
}
func getMigConfigDevices(image image.CUDA) *string {
return getMigDevices(image, envNVMigConfigDevices)
}
func getMigMonitorDevices(image image.CUDA) *string {
return getMigDevices(image, envNVMigMonitorDevices)
}
func getMigDevices(image image.CUDA, envvar string) *string {
if !image.HasEnvvar(envvar) {
return nil
func getMigConfigDevices(env map[string]string) *string {
if devices, ok := env[envNVMigConfigDevices]; ok {
return &devices
}
devices := image.Getenv(envvar)
return &devices
return nil
}
func getImexChannels(image image.CUDA) *string {
if !image.HasEnvvar(envNVImexChannels) {
return nil
func getMigMonitorDevices(env map[string]string) *string {
if devices, ok := env[envNVMigMonitorDevices]; ok {
return &devices
}
chans := image.Getenv(envNVImexChannels)
return &chans
return nil
}
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
if !capsEnvSpecified && legacyImage {
// Environment variable unset with legacy image: set all capabilities.
@@ -338,11 +324,6 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
var imexChannels string
if c := getImexChannels(image); c != nil {
imexChannels = *c
}
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
requirements, err := image.GetRequirements()
@@ -354,7 +335,6 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
Devices: devices,
MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices,
ImexChannels: imexChannels,
DriverCapabilities: driverCapabilities,
Requirements: requirements,
}

View File

@@ -5,9 +5,8 @@ import (
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/stretchr/testify/require"
)
func TestGetNvidiaConfig(t *testing.T) {
@@ -466,9 +465,6 @@ func TestGetNvidiaConfig(t *testing.T) {
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
image, _ := image.New(
image.WithEnvMap(tc.env),
)
// Wrap the call to getNvidiaConfig() in a closure.
var config *nvidiaConfig
getConfig := func() {
@@ -477,7 +473,7 @@ func TestGetNvidiaConfig(t *testing.T) {
defaultConfig, _ := getDefaultHookConfig()
hookConfig = &defaultConfig
}
config = getNvidiaConfig(hookConfig, image, nil, tc.privileged)
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
}
// For any tests that are expected to panic, make sure they do.
@@ -682,17 +678,13 @@ func TestDeviceListSourcePriority(t *testing.T) {
// Wrap the call to getDevices() in a closure.
var devices *string
getDevices := func() {
image, _ := image.New(
image.WithEnvMap(
map[string]string{
envNVVisibleDevices: tc.envvarDevices,
},
),
)
env := map[string]string{
envNVVisibleDevices: tc.envvarDevices,
}
hookConfig, _ := getDefaultHookConfig()
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged)
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
}
// For all other tests, just grab the devices and check the results
@@ -913,10 +905,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
for i, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
image, _ := image.New(
image.WithEnvMap(tc.env),
)
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
if tc.expectedDevices == nil {
require.Nil(t, devices, "%d: %v", i, tc)
return
@@ -1026,17 +1015,14 @@ func TestGetDriverCapabilities(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
var capabilities string
var capabilites string
c := HookConfig{
SupportedDriverCapabilities: tc.supportedCapabilities,
}
image, _ := image.New(
image.WithEnvMap(tc.env),
)
getDriverCapabilities := func() {
capabilities = c.getDriverCapabilities(image, tc.legacyImage).String()
capabilites = c.getDriverCapabilities(tc.env, tc.legacyImage).String()
}
if tc.expectedPanic {
@@ -1045,7 +1031,7 @@ func TestGetDriverCapabilities(t *testing.T) {
}
getDriverCapabilities()
require.EqualValues(t, tc.expectedCapabilities, capabilities)
require.EqualValues(t, tc.expectedCapabilities, capabilites)
})
}
}

View File

@@ -17,6 +17,8 @@ const (
driverPath = "/run/nvidia/driver"
)
var defaultPaths = [...]string{}
// HookConfig : options for the nvidia-container-runtime-hook.
type HookConfig config.Config

View File

@@ -21,9 +21,8 @@ import (
"os"
"testing"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/stretchr/testify/require"
)
func TestGetHookConfig(t *testing.T) {

View File

@@ -111,8 +111,8 @@ func doPrestart() {
}
args = append(args, "configure")
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
if cli.Ldconfig != "" {
args = append(args, fmt.Sprintf("--ldconfig=%s", cli.Ldconfig))
}
if cli.NoCgroups {
args = append(args, "--no-cgroups")
@@ -126,9 +126,6 @@ func doPrestart() {
if len(nvidia.MigMonitorDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
}
if len(nvidia.ImexChannels) > 0 {
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
}
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
if len(cap) == 0 {
@@ -145,7 +142,6 @@ func doPrestart() {
args = append(args, rootfs)
env := append(os.Environ(), cli.Environment...)
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection?
err = syscall.Exec(args[0], args, env)
log.Panicln("exec failed:", err)
}

View File

@@ -85,126 +85,3 @@ Alternatively the NVIDIA Container Runtime can be set as the default runtime for
}
}
```
## Environment variables (OCI spec)
Each environment variable maps to an command-line argument for `nvidia-container-cli` from [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
These variables are already set in our [official CUDA images](https://hub.docker.com/r/nvidia/cuda/).
### `NVIDIA_VISIBLE_DEVICES`
This variable controls which GPUs will be made accessible inside the container.
#### Possible values
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
* `all`: all GPUs will be accessible, this is the default value in our container images.
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
**Note**: When running on a MIG capable device, the following values will also be available:
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
```
$ nvidia-smi -L
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
```
### `NVIDIA_MIG_CONFIG_DEVICES`
This variable controls which of the visible GPUs can have their MIG
configuration managed from within the container. This includes enabling and
disabling MIG mode, creating and destroying GPU Instances and Compute
Instances, etc.
#### Possible values
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
MIG configurations managed.
**Note**:
* This feature is only available on MIG capable devices (e.g. the A100).
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
* When not running as `root`, the container user must have read access to the
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
### `NVIDIA_MIG_MONITOR_DEVICES`
This variable controls which of the visible GPUs can have aggregate information
about all of their MIG devices monitored from within the container. This
includes inspecting the aggregate memory usage, listing the aggregate running
processes, etc.
#### Possible values
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
MIG devices monitored.
**Note**:
* This feature is only available on MIG capable devices (e.g. the A100).
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
* When not running as `root`, the container user must have read access to the
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
### `NVIDIA_DRIVER_CAPABILITIES`
This option controls which driver libraries/binaries will be mounted inside the container.
#### Possible values
* `compute,video`, `graphics,utility` …: a comma-separated list of driver features the container needs.
* `all`: enable all available driver capabilities.
* *empty* or *unset*: use default driver capability: `utility,compute`.
#### Supported driver capabilities
* `compute`: required for CUDA and OpenCL applications.
* `compat32`: required for running 32-bit applications.
* `graphics`: required for running OpenGL and Vulkan applications.
* `utility`: required for using `nvidia-smi` and NVML.
* `video`: required for using the Video Codec SDK.
* `display`: required for leveraging X11 display.
### `NVIDIA_REQUIRE_*`
A logical expression to define constraints on the configurations supported by the container.
#### Supported constraints
* `cuda`: constraint on the CUDA driver version.
* `driver`: constraint on the driver version.
* `arch`: constraint on the compute architectures of the selected GPUs.
* `brand`: constraint on the brand of the selected GPUs (e.g. GeForce, Tesla, GRID).
#### Expressions
Multiple constraints can be expressed in a single environment variable: space-separated constraints are ORed, comma-separated constraints are ANDed.
Multiple environment variables of the form `NVIDIA_REQUIRE_*` are ANDed together.
### `NVIDIA_DISABLE_REQUIRE`
Single switch to disable all the constraints of the form `NVIDIA_REQUIRE_*`.
### `NVIDIA_REQUIRE_CUDA`
The version of the CUDA toolkit used by the container. It is an instance of the generic `NVIDIA_REQUIRE_*` case and it is set by official CUDA images.
If the version of the NVIDIA driver is insufficient to run this version of CUDA, the container will not be started.
#### Possible values
* `cuda>=7.5`, `cuda>=8.0`, `cuda>=9.0` …: any valid CUDA version in the form `major.minor`.
### `CUDA_VERSION`
Similar to `NVIDIA_REQUIRE_CUDA`, for legacy CUDA images.
In addition, if `NVIDIA_REQUIRE_CUDA` is not set, `NVIDIA_VISIBLE_DEVICES` and `NVIDIA_DRIVER_CAPABILITIES` will default to `all`.
## Usage example
**NOTE:** The use of the `nvidia-container-runtime` as CLI replacement for `runc` is uncommon and is only provided for completeness.
Although the `nvidia-container-runtime` is typically configured as a replacement for `runc` or `crun` in various container engines, it can also be
invoked from the command line as `runc` would. For example:
```sh
# Setup a rootfs based on Ubuntu 16.04
cd $(mktemp -d) && mkdir rootfs
curl -sS http://cdimage.ubuntu.com/ubuntu-base/releases/16.04/release/ubuntu-base-16.04.6-base-amd64.tar.gz | tar --exclude 'dev/*' -C rootfs -xz
# Create an OCI runtime spec
nvidia-container-runtime spec
sed -i 's;"sh";"nvidia-smi";' config.json
sed -i 's;\("TERM=xterm"\);\1, "NVIDIA_VISIBLE_DEVICES=0";' config.json
# Run the container
sudo nvidia-container-runtime run nvidia_smi
```

View File

@@ -3,7 +3,7 @@ package main
import (
"bytes"
"encoding/json"
"io"
"io/ioutil"
"log"
"os"
"os/exec"
@@ -11,12 +11,11 @@ import (
"strings"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
const (
@@ -87,7 +86,6 @@ func TestBadInput(t *testing.T) {
t.Fatal(err)
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
err = cmdCreate.Run()
@@ -105,7 +103,6 @@ func TestGoodInput(t *testing.T) {
t.Fatalf("error generating runtime spec: %v", err)
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
output, err := cmdRun.CombinedOutput()
@@ -116,7 +113,6 @@ func TestGoodInput(t *testing.T) {
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
require.Empty(t, spec.Hooks, "there should be no hooks in config.json")
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
err = cmdCreate.Run()
@@ -162,7 +158,6 @@ func TestDuplicateHook(t *testing.T) {
}
// Test how runtime handles already existing prestart hook in config.json
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
output, err := cmdCreate.CombinedOutput()
@@ -193,16 +188,15 @@ func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
}
defer jsonFile.Close()
jsonContent, err := io.ReadAll(jsonFile)
switch {
case err != nil:
jsonContent, err := ioutil.ReadAll(jsonFile)
if err != nil {
return spec, err
case json.Valid(jsonContent):
} else if json.Valid(jsonContent) {
err = json.Unmarshal(jsonContent, &spec)
if err != nil {
return spec, err
}
default:
} else {
err = json.NewDecoder(bytes.NewReader(jsonContent)).Decode(&spec)
if err != nil {
return spec, err
@@ -232,7 +226,6 @@ func (c testConfig) generateNewRuntimeSpec() error {
return err
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmd := exec.Command("cp", c.unmodifiedSpecFile(), c.specFilePath())
err = cmd.Run()
if err != nil {

View File

@@ -43,7 +43,7 @@ By default, all commands output to `STDOUT`, but specifying the `--output` flag
### Generate CDI specifications
The [Container Device Interface (CDI)](https://tags.cncf.io/container-device-interface) provides
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
available NVIDIA GPUs in a system.

View File

@@ -17,12 +17,11 @@
package cdi
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/list"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {

View File

@@ -22,15 +22,14 @@ import (
"path/filepath"
"strings"
"github.com/urfave/cli/v2"
cdi "tags.cncf.io/container-device-interface/pkg/parser"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/urfave/cli/v2"
)
const (
@@ -42,18 +41,15 @@ type command struct {
}
type options struct {
output string
format string
deviceNameStrategies cli.StringSlice
driverRoot string
devRoot string
nvidiaCTKPath string
ldconfigPath string
mode string
vendor string
class string
output string
format string
deviceNameStrategy string
driverRoot string
nvidiaCTKPath string
mode string
vendor string
class string
configSearchPaths cli.StringSlice
librarySearchPaths cli.StringSlice
csv struct {
@@ -87,11 +83,6 @@ func (m command) build() *cli.Command {
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "config-search-path",
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
Destination: &opts.configSearchPaths,
},
&cli.StringFlag{
Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
@@ -111,15 +102,10 @@ func (m command) build() *cli.Command {
Destination: &opts.mode,
},
&cli.StringFlag{
Name: "dev-root",
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
Destination: &opts.devRoot,
},
&cli.StringSliceFlag{
Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
Destination: &opts.deviceNameStrategies,
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
Value: nvcdi.DeviceNameStrategyIndex,
Destination: &opts.deviceNameStrategy,
},
&cli.StringFlag{
Name: "driver-root",
@@ -136,11 +122,6 @@ func (m command) build() *cli.Command {
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
Destination: &opts.nvidiaCTKPath,
},
&cli.StringFlag{
Name: "ldconfig-path",
Usage: "Specify the path to use for ldconfig in the generated CDI specification",
Destination: &opts.ldconfigPath,
},
&cli.StringFlag{
Name: "vendor",
Aliases: []string{"cdi-vendor"},
@@ -191,11 +172,9 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
}
for _, strategy := range opts.deviceNameStrategies.Value() {
_, err := nvcdi.NewDeviceNamer(strategy)
if err != nil {
return err
}
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
if err != nil {
return err
}
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
@@ -249,24 +228,17 @@ func formatFromFilename(filename string) string {
}
func (m command) generateSpec(opts *options) (spec.Interface, error) {
var deviceNamers []nvcdi.DeviceNamer
for _, strategy := range opts.deviceNameStrategies.Value() {
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err)
}
deviceNamers = append(deviceNamers, deviceNamer)
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err)
}
cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(opts.driverRoot),
nvcdi.WithDevRoot(opts.devRoot),
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
nvcdi.WithLdconfigPath(opts.ldconfigPath),
nvcdi.WithDeviceNamers(deviceNamers...),
nvcdi.WithMode(opts.mode),
nvcdi.WithConfigSearchPaths(opts.configSearchPaths.Value()),
nvcdi.WithDeviceNamer(deviceNamer),
nvcdi.WithMode(string(opts.mode)),
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
nvcdi.WithCSVFiles(opts.csv.files.Value()),
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),

View File

@@ -17,22 +17,18 @@
package list
import (
"errors"
"fmt"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type config struct {
cdiSpecDirs cli.StringSlice
}
type config struct{}
// NewCommand constructs a cdi list command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
@@ -58,44 +54,30 @@ func (m command) build() *cli.Command {
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "spec-dir",
Usage: "specify the directories to scan for CDI specifications",
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
Destination: &cfg.cdiSpecDirs,
},
}
c.Flags = []cli.Flag{}
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *config) error {
if len(cfg.cdiSpecDirs.Value()) == 0 {
return errors.New("at least one CDI specification directory must be specified")
}
return nil
}
func (m command) run(c *cli.Context, cfg *config) error {
registry, err := cdi.NewCache(
cdi.WithAutoRefresh(false),
cdi.WithSpecDirs(cfg.cdiSpecDirs.Value()...),
cdi.WithSpecDirs(cdi.DefaultSpecDirs...),
)
if err != nil {
return fmt.Errorf("failed to create CDI cache: %v", err)
}
_ = registry.Refresh()
if errors := registry.GetErrors(); len(errors) > 0 {
m.logger.Warningf("The following registry errors were reported:")
for k, err := range errors {
m.logger.Warningf("%v: %v", k, err)
}
}
refreshErr := registry.Refresh()
devices := registry.ListDevices()
m.logger.Infof("Found %d CDI devices", len(devices))
if refreshErr != nil {
m.logger.Warningf("Refreshing the CDI registry returned the following error(s): %v", refreshErr)
}
for _, device := range devices {
fmt.Printf("%s\n", device)
}

View File

@@ -21,14 +21,18 @@ import (
"io"
"os"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/urfave/cli/v2"
)
type loadSaver interface {
Load() (spec.Interface, error)
Save(spec.Interface) error
}
type command struct {
logger logger.Interface
}
@@ -40,9 +44,8 @@ type transformOptions struct {
type options struct {
transformOptions
from string
to string
relativeTo string
from string
to string
}
// NewCommand constructs a generate-cdi command with the specified logger
@@ -69,11 +72,6 @@ func (m command) build() *cli.Command {
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "from",
Usage: "specify the root to be transformed",
Destination: &opts.from,
},
&cli.StringFlag{
Name: "input",
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
@@ -86,10 +84,9 @@ func (m command) build() *cli.Command {
Destination: &opts.output,
},
&cli.StringFlag{
Name: "relative-to",
Usage: "specify whether the transform is relative to the host or to the container. One of [ host | container ]",
Value: "host",
Destination: &opts.relativeTo,
Name: "from",
Usage: "specify the root to be transformed",
Destination: &opts.from,
},
&cli.StringFlag{
Name: "to",
@@ -103,12 +100,6 @@ func (m command) build() *cli.Command {
}
func (m command) validateFlags(c *cli.Context, opts *options) error {
switch opts.relativeTo {
case "host":
case "container":
default:
return fmt.Errorf("invalid --relative-to value: %v", opts.relativeTo)
}
return nil
}
@@ -118,10 +109,9 @@ func (m command) run(c *cli.Context, opts *options) error {
return fmt.Errorf("failed to load CDI specification: %w", err)
}
err = transformroot.New(
transformroot.WithRoot(opts.from),
transformroot.WithTargetRoot(opts.to),
transformroot.WithRelativeTo(opts.relativeTo),
err = transform.NewRootTransformer(
opts.from,
opts.to,
).Transform(spec.Raw())
if err != nil {
return fmt.Errorf("failed to transform CDI specification: %w", err)

View File

@@ -17,10 +17,9 @@
package transform
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {

View File

@@ -109,11 +109,7 @@ func run(c *cli.Context, opts *options) error {
if err != nil {
return fmt.Errorf("invalid --set option %v: %w", set, err)
}
if value == nil {
_ = cfgToml.Delete(key)
} else {
cfgToml.Set(key, value)
}
cfgToml.Set(key, value)
}
if err := opts.EnsureOutputFolder(); err != nil {
@@ -125,10 +121,10 @@ func run(c *cli.Context, opts *options) error {
}
defer output.Close()
if _, err := cfgToml.Save(output); err != nil {
return fmt.Errorf("failed to save config: %v", err)
if err != nil {
return err
}
cfgToml.Save(output)
return nil
}
@@ -150,25 +146,20 @@ func setFlagToKeyValue(setFlag string) (string, interface{}, error) {
kind := field.Kind()
if len(setParts) != 2 {
if kind == reflect.Bool || (kind == reflect.Pointer && field.Elem().Kind() == reflect.Bool) {
if kind == reflect.Bool {
return key, true, nil
}
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
}
value := setParts[1]
if kind == reflect.Pointer && value != "nil" {
kind = field.Elem().Kind()
}
switch kind {
case reflect.Pointer:
return key, nil, nil
case reflect.Bool:
b, err := strconv.ParseBool(value)
if err != nil {
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
return key, b, nil
return key, b, err
case reflect.String:
return key, value, nil
case reflect.Slice:
@@ -210,7 +201,7 @@ func getStruct(current reflect.Type, paths ...string) (reflect.StructField, erro
if !ok {
continue
}
if strings.SplitN(v, ",", 2)[0] != tomlField {
if v != tomlField {
continue
}
if len(paths) == 1 {

View File

@@ -19,11 +19,10 @@ package defaultsubcommand
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -86,7 +85,8 @@ func (m command) run(c *cli.Context, opts *flags.Options) error {
}
defer output.Close()
if _, err = cfgToml.Save(output); err != nil {
_, err = cfgToml.Save(output)
if err != nil {
return fmt.Errorf("failed to write output: %v", err)
}

View File

@@ -17,18 +17,16 @@
package chmod
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/urfave/cli/v2"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -37,8 +35,7 @@ type command struct {
type config struct {
paths cli.StringSlice
modeStr string
mode fs.FileMode
mode string
containerSpec string
}
@@ -69,13 +66,13 @@ func (m command) build() *cli.Command {
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "path",
Usage: "Specify a path to apply the specified mode to",
Usage: "Specifiy a path to apply the specified mode to",
Destination: &cfg.paths,
},
&cli.StringFlag{
Name: "mode",
Usage: "Specify the file mode",
Destination: &cfg.modeStr,
Destination: &cfg.mode,
},
&cli.StringFlag{
Name: "container-spec",
@@ -88,16 +85,10 @@ func (m command) build() *cli.Command {
}
func validateFlags(c *cli.Context, cfg *config) error {
if strings.TrimSpace(cfg.modeStr) == "" {
if strings.TrimSpace(cfg.mode) == "" {
return fmt.Errorf("a non-empty mode must be specified")
}
modeInt, err := strconv.ParseUint(cfg.modeStr, 8, 32)
if err != nil {
return fmt.Errorf("failed to parse mode as octal: %v", err)
}
cfg.mode = fs.FileMode(modeInt)
for _, p := range cfg.paths.Value() {
if strings.TrimSpace(p) == "" {
return fmt.Errorf("paths must not be empty")
@@ -121,38 +112,33 @@ func (m command) run(c *cli.Context, cfg *config) error {
return fmt.Errorf("empty container root detected")
}
paths := m.getPaths(containerRoot, cfg.paths.Value(), cfg.mode)
paths := m.getPaths(containerRoot, cfg.paths.Value())
if len(paths) == 0 {
m.logger.Debugf("No paths specified; exiting")
return nil
}
for _, path := range paths {
err = os.Chmod(path, cfg.mode)
// in some cases this is not an issue (e.g. whole /dev mounted), see #143
if errors.Is(err, fs.ErrPermission) {
m.logger.Debugf("Ignoring permission error with chmod: %v", err)
err = nil
}
locator := lookup.NewExecutableLocator(m.logger, "")
targets, err := locator.Locate("chmod")
if err != nil {
return fmt.Errorf("failed to locate chmod: %v", err)
}
chmodPath := targets[0]
return err
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
return syscall.Exec(chmodPath, args, nil)
}
// getPaths updates the specified paths relative to the root.
func (m command) getPaths(root string, paths []string, desiredMode fs.FileMode) []string {
func (m command) getPaths(root string, paths []string) []string {
var pathsInRoot []string
for _, f := range paths {
path := filepath.Join(root, f)
stat, err := os.Stat(path)
if err != nil {
if _, err := os.Stat(path); err != nil {
m.logger.Debugf("Skipping path %q: %v", path, err)
continue
}
if (stat.Mode()&(fs.ModePerm|fs.ModeSetuid|fs.ModeSetgid|fs.ModeSticky))^desiredMode == 0 {
m.logger.Debugf("Skipping path %q: already desired mode", path)
continue
}
pathsInRoot = append(pathsInRoot, path)
}

View File

@@ -22,13 +22,12 @@ import (
"path/filepath"
"strings"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -57,7 +56,7 @@ func (m command) build() *cli.Command {
// Create the '' command
c := cli.Command{
Name: "create-symlinks",
Usage: "A hook to create symlinks in the container. This can be used to process CSV mount specs",
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},

View File

@@ -20,10 +20,9 @@ import (
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
"github.com/urfave/cli/v2"
)
type hookCommand struct {

View File

@@ -17,27 +17,22 @@
package ldcache
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type options struct {
type config struct {
folders cli.StringSlice
ldconfigPath string
containerSpec string
}
@@ -51,15 +46,12 @@ func NewCommand(logger logger.Interface) *cli.Command {
// build the update-ldcache command
func (m command) build() *cli.Command {
cfg := options{}
cfg := config{}
// Create the 'update-ldcache' command
c := cli.Command{
Name: "update-ldcache",
Usage: "Update ldcache in a container by running ldconfig",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
@@ -68,15 +60,9 @@ func (m command) build() *cli.Command {
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "folder",
Usage: "Specify a folder to add to /etc/ld.so.conf before updating the ld cache",
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
Destination: &cfg.folders,
},
&cli.StringFlag{
Name: "ldconfig-path",
Usage: "Specify the path to the ldconfig program",
Destination: &cfg.ldconfigPath,
Value: "/sbin/ldconfig",
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
@@ -87,14 +73,7 @@ func (m command) build() *cli.Command {
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *options) error {
if cfg.ldconfigPath == "" {
return errors.New("ldconfig-path must be specified")
}
return nil
}
func (m command) run(c *cli.Context, cfg *options) error {
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
@@ -105,52 +84,23 @@ func (m command) run(c *cli.Context, cfg *options) error {
return fmt.Errorf("failed to determined container root: %v", err)
}
ldconfigPath := m.resolveLDConfigPath(cfg.ldconfigPath)
args := []string{filepath.Base(ldconfigPath)}
_, err = os.Stat(filepath.Join(containerRoot, "/etc/ld.so.cache"))
if err != nil && os.IsNotExist(err) {
m.logger.Debugf("No ld.so.cache found, skipping update")
return nil
}
err = m.createConfig(containerRoot, cfg.folders.Value())
if err != nil {
return fmt.Errorf("failed to update ld.so.conf: %v", err)
}
args := []string{"/sbin/ldconfig"}
if containerRoot != "" {
args = append(args, "-r", containerRoot)
}
if root(containerRoot).hasPath("/etc/ld.so.cache") {
args = append(args, "-C", "/etc/ld.so.cache")
} else {
m.logger.Debugf("No ld.so.cache found, skipping update")
args = append(args, "-N")
}
folders := cfg.folders.Value()
if root(containerRoot).hasPath("/etc/ld.so.conf.d") {
err := m.createConfig(containerRoot, folders)
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
}
} else {
args = append(args, folders...)
}
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
// be configured to use a different config file by default.
args = append(args, "-f", "/etc/ld.so.conf")
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(ldconfigPath, args, nil)
}
type root string
func (r root) hasPath(path string) bool {
_, err := os.Stat(filepath.Join(string(r), path))
if err != nil && os.IsNotExist(err) {
return false
}
return true
}
// resolveLDConfigPath determines the LDConfig path to use for the system.
// On systems such as Ubuntu where `/sbin/ldconfig` is a wrapper around
// /sbin/ldconfig.real, the latter is returned.
func (m command) resolveLDConfigPath(path string) string {
return strings.TrimPrefix(config.NormalizeLDConfigPath("@"+path), "@")
return syscall.Exec(args[0], args, nil)
}
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container

View File

@@ -17,9 +17,8 @@
package info
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {

View File

@@ -19,8 +19,6 @@ package main
import (
"os"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
@@ -28,6 +26,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
)

View File

@@ -149,7 +149,6 @@ func (m command) build() *cli.Command {
},
&cli.BoolFlag{
Name: "cdi.enabled",
Aliases: []string{"cdi.enable"},
Usage: "Enable CDI in the configured runtime",
Destination: &config.cdi.enabled,
},
@@ -309,11 +308,9 @@ func enableCDI(config *config, cfg engine.Interface) error {
}
switch config.runtime {
case "containerd":
cfg.Set("enable_cdi", true)
return cfg.Set("enable_cdi", true)
case "docker":
cfg.Set("features", map[string]bool{"cdi": true})
default:
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
return cfg.Set("experimental", true)
}
return nil
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
}

View File

@@ -17,10 +17,9 @@
package runtime
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type runtimeCommand struct {

View File

@@ -20,11 +20,10 @@ import (
"fmt"
"path/filepath"
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
)
type allPossible struct {

View File

@@ -24,12 +24,11 @@ import (
"strings"
"syscall"
"github.com/fsnotify/fsnotify"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/fsnotify/fsnotify"
"github.com/urfave/cli/v2"
)
const (
@@ -87,7 +86,7 @@ func (m command) build() *cli.Command {
Usage: "The path to the driver root. `DRIVER_ROOT`/dev is searched for NVIDIA device nodes.",
Value: "/",
Destination: &cfg.driverRoot,
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
EnvVars: []string{"DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "watch",

View File

@@ -20,10 +20,9 @@ import (
"path/filepath"
"strings"
"golang.org/x/sys/unix"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"golang.org/x/sys/unix"
)
type nodeLister interface {
@@ -64,13 +63,20 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
if m.nodeIsBlocked(d) {
continue
}
var stat unix.Stat_t
err := unix.Stat(d, &stat)
if err != nil {
m.logger.Warningf("Could not stat device: %v", err)
continue
}
deviceNodes = append(deviceNodes, newDeviceNode(d, stat))
deviceNode := deviceNode{
path: d,
major: unix.Major(uint64(stat.Rdev)),
minor: unix.Minor(uint64(stat.Rdev)),
}
deviceNodes = append(deviceNodes, deviceNode)
}
return deviceNodes, nil

View File

@@ -1,28 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devchar
import "golang.org/x/sys/unix"
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
deviceNode := deviceNode{
path: d,
major: unix.Major(stat.Rdev),
minor: unix.Minor(stat.Rdev),
}
return deviceNode
}

View File

@@ -1,30 +0,0 @@
//go:build !linux
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devchar
import "golang.org/x/sys/unix"
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
deviceNode := deviceNode{
path: d,
major: unix.Major(uint64(stat.Rdev)),
minor: unix.Minor(uint64(stat.Rdev)),
}
return deviceNode
}

View File

@@ -19,11 +19,10 @@ package createdevicenodes
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -69,7 +68,7 @@ func (m command) build() *cli.Command {
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
EnvVars: []string{"DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "control-devices",

View File

@@ -19,10 +19,9 @@ package createdevicenodes
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -62,7 +61,7 @@ func (m command) build() *cli.Command {
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
EnvVars: []string{"DRIVER_ROOT"},
},
}

View File

@@ -17,12 +17,11 @@
package system
import (
"github.com/urfave/cli/v2"
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/print-ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {

View File

@@ -22,7 +22,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \

View File

@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG GOLANG_VERSION=x.x.x
ARG GOLANGCI_LINT_VERSION=v1.54.1
FROM golang:${GOLANG_VERSION}
RUN go install golang.org/x/lint/golint@6edffad5e6160f5949cdefc81710b2706fbcd4f6
@@ -20,8 +19,3 @@ RUN go install github.com/matryer/moq@latest
RUN go install github.com/gordonklaus/ineffassign@d2c82e48359b033cde9cf1307f6d5550b8d61321
RUN go install github.com/client9/misspell/cmd/misspell@latest
RUN go install github.com/google/go-licenses@latest
RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin ${GOLANGCI_LINT_VERSION}
# We need to set the /work directory as a safe directory.
# This allows git commands to run in the container.
RUN git config --file=/.gitconfig --add safe.directory /work

View File

@@ -15,7 +15,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture"; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \

View File

@@ -33,7 +33,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture"; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \

View File

@@ -20,7 +20,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \

26
go.mod
View File

@@ -3,33 +3,31 @@ module github.com/NVIDIA/nvidia-container-toolkit
go 1.20
require (
github.com/NVIDIA/go-nvlib v0.2.0
github.com/NVIDIA/go-nvml v0.12.0-3
github.com/fsnotify/fsnotify v1.7.0
github.com/opencontainers/runtime-spec v1.2.0
github.com/pelletier/go-toml v1.9.5
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.1
golang.org/x/mod v0.17.0
golang.org/x/sys v0.19.0
tags.cncf.io/container-device-interface v0.7.1
tags.cncf.io/container-device-interface/specs-go v0.7.0
github.com/NVIDIA/go-nvml v0.12.0-1
github.com/container-orchestrated-devices/container-device-interface v0.6.0
github.com/fsnotify/fsnotify v1.5.4
github.com/opencontainers/runtime-spec v1.1.0-rc.2
github.com/pelletier/go-toml v1.9.4
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.8.1
github.com/urfave/cli/v2 v2.3.0
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884
golang.org/x/mod v0.5.0
golang.org/x/sys v0.7.0
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/opencontainers/runc v1.1.6 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect

64
go.sum
View File

@@ -1,20 +1,20 @@
github.com/NVIDIA/go-nvlib v0.2.0 h1:roq+SDstbP1fcy2XVH7wB2Gz2/Ud7Q+NGQYOcVITVrA=
github.com/NVIDIA/go-nvlib v0.2.0/go.mod h1:kFuLNTyD1tF6FbRFlk+/EdUW5BrkE+v1Y3A3/9zKSjA=
github.com/NVIDIA/go-nvml v0.12.0-3 h1:QwfjYxEqIQVRhl8327g2Y3ZvKResPydpGSKtCIIK9jE=
github.com/NVIDIA/go-nvml v0.12.0-3/go.mod h1:SOufGc5Wql+cxrIZ8RyJwVKDYxfbs4WPkHXqadcbfvA=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NVIDIA/go-nvml v0.12.0-1 h1:6mdjtlFo+17dWL7VFPfuRMtf0061TF4DKls9pkSw6uM=
github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/container-orchestrated-devices/container-device-interface v0.6.0 h1:aWwcz/Ep0Fd7ZuBjQGjU/jdPloM7ydhMW13h85jZNvk=
github.com/container-orchestrated-devices/container-device-interface v0.6.0/go.mod h1:OQlgtJtDrOxSQ1BWODC8OZK1tzi9W69wek+Jy17ndzo=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
@@ -29,37 +29,45 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runc v1.1.6 h1:XbhB8IfG/EsnhNvZtNdLB0GBw92GYEFvKlhaJk9jUgA=
github.com/opencontainers/runc v1.1.6/go.mod h1:CbUumNnWCuTGFukNXahoo/RFBZvDAgRh/smNYNOhA50=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.1.0-rc.2 h1:ucBtEms2tamYYW/SvGpvq9yUN0NEVL6oyLEwDcTSrk8=
github.com/opencontainers/runtime-spec v1.1.0-rc.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -67,18 +75,22 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a h1:lceJVurLqiWFdxK6KMDw+SIwrAsFW/af44XrNlbGw78=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a/go.mod h1:KYZksBgh18o+uzgnpDazzG4LVYtnfB96VXHMXypEtik=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884 h1:V0LUbfm4kVA1CPG8FgG9AGZqa3ykE5U12Gd3PZgoItA=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884/go.mod h1:/x5Ky1ZJNyCjDkgSL1atII0EFKQF5WaIHKeP5nkaQfk=
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -86,7 +98,3 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
tags.cncf.io/container-device-interface v0.7.1 h1:MATNCbAD1su9U6zwQe5BrQ2vGGp1GBayD70bYaxYCNE=
tags.cncf.io/container-device-interface v0.7.1/go.mod h1:h1JVuOqTQVORp8DziaWKUCDNzAmN+zeCbqbqD30D0ZQ=
tags.cncf.io/container-device-interface/specs-go v0.7.0 h1:w/maMGVeLP6TIQJVYT5pbqTi8SCw/iHZ+n4ignuGHqg=
tags.cncf.io/container-device-interface/specs-go v0.7.0/go.mod h1:hMAwAbMZyBLdmYqWgYcKH0F/yctNpV3P35f+/088A80=

View File

@@ -16,11 +16,6 @@
package config
import (
"os"
"strings"
)
// ContainerCLIConfig stores the options for the nvidia-container-cli
type ContainerCLIConfig struct {
Root string `toml:"root"`
@@ -36,27 +31,3 @@ type ContainerCLIConfig struct {
User string `toml:"user"`
Ldconfig string `toml:"ldconfig"`
}
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
// This is only done for host LDConfigs and is required to handle systems where
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
func (c *ContainerCLIConfig) NormalizeLDConfigPath() string {
return NormalizeLDConfigPath(c.Ldconfig)
}
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
// This is only done for host LDConfigs and is required to handle systems where
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
func NormalizeLDConfigPath(path string) string {
if !strings.HasPrefix(path, "@") {
return path
}
trimmedPath := strings.TrimSuffix(strings.TrimPrefix(path, "@"), ".real")
// If the .real path exists, we return that.
if _, err := os.Stat(trimmedPath + ".real"); err == nil {
return "@" + trimmedPath + ".real"
}
// If the .real path does not exists (or cannot be read) we return the non-.real path.
return "@" + trimmedPath
}

View File

@@ -1,83 +0,0 @@
/**
# Copyright 2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
)
func TestNormalizeLDConfigPath(t *testing.T) {
testDir := t.TempDir()
f, err := os.Create(filepath.Join(testDir, "exists.real"))
require.NoError(t, err)
_ = f.Close()
testCases := []struct {
description string
ldconfig string
expected string
}{
{
description: "empty input",
},
{
description: "non-host with .real suffix returns as is",
ldconfig: "/some/path/ldconfig.real",
expected: "/some/path/ldconfig.real",
},
{
description: "non-host without .real suffix returns as is",
ldconfig: "/some/path/ldconfig",
expected: "/some/path/ldconfig",
},
{
description: "host .real file exists is returned",
ldconfig: "@" + filepath.Join(testDir, "exists.real"),
expected: "@" + filepath.Join(testDir, "exists.real"),
},
{
description: "host resolves .real file",
ldconfig: "@" + filepath.Join(testDir, "exists"),
expected: "@" + filepath.Join(testDir, "exists.real"),
},
{
description: "host .real file not exists strips suffix",
ldconfig: "@/does/not/exist.real",
expected: "@/does/not/exist",
},
{
description: "host file returned as is if no .real file exsits",
ldconfig: "@/does/not/exist",
expected: "@/does/not/exist",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
c := ContainerCLIConfig{
Ldconfig: tc.ldconfig,
}
require.Equal(t, tc.expected, c.NormalizeLDConfigPath())
})
}
}

View File

@@ -22,11 +22,10 @@ import (
"path/filepath"
"strings"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
)
const (
@@ -63,9 +62,6 @@ type Config struct {
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
NVIDIAContainerRuntimeHookConfig RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
// Features allows for finer control over optional features.
Features features `toml:"features,omitempty"`
}
// GetConfigFilePath returns the path to the config file for the configured system
@@ -127,7 +123,10 @@ func GetDefault() (*Config, error) {
}
func getLdConfigPath() string {
return NormalizeLDConfigPath("@/sbin/ldconfig")
if _, err := os.Stat("/sbin/ldconfig.real"); err == nil {
return "@/sbin/ldconfig.real"
}
return "@/sbin/ldconfig"
}
func getUserGroup() string {

View File

@@ -35,7 +35,7 @@ func TestGetConfigWithCustomConfig(t *testing.T) {
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, os.WriteFile(filename, contents, 0600))
require.NoError(t, os.WriteFile(filename, contents, 0766))
cfg, err := GetConfig()
require.NoError(t, err)

View File

@@ -1,85 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
type featureName string
const (
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy")
)
// features specifies a set of named features.
type features struct {
GDS *feature `toml:"gds,omitempty"`
MOFED *feature `toml:"mofed,omitempty"`
NVSWITCH *feature `toml:"nvswitch,omitempty"`
GDRCopy *feature `toml:"gdrcopy,omitempty"`
}
type feature bool
// IsEnabled checks whether a specified named feature is enabled.
// An optional list of environments to check for feature-specific environment
// variables can also be supplied.
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
featureEnvvars := map[featureName]string{
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
}
envvar := featureEnvvars[n]
switch n {
case FeatureGDS:
return fs.GDS.isEnabled(envvar, in...)
case FeatureMOFED:
return fs.MOFED.isEnabled(envvar, in...)
case FeatureNVSWITCH:
return fs.NVSWITCH.isEnabled(envvar, in...)
case FeatureGDRCopy:
return fs.GDRCopy.isEnabled(envvar, in...)
default:
return false
}
}
// isEnabled checks whether a feature is enabled.
// If the enabled value is explicitly set, this is returned, otherwise the
// associated envvar is checked in the specified getenver for the string "enabled"
// A CUDA container / image can be passed here.
func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
if f != nil {
return bool(*f)
}
if envvar == "" {
return false
}
for _, in := range ins {
if in.Getenv(envvar) == "enabled" {
return true
}
}
return false
}
type getenver interface {
Getenv(string) string
}

View File

@@ -19,13 +19,10 @@ package image
import (
"fmt"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
)
type builder struct {
env map[string]string
mounts []specs.Mount
env []string
disableRequire bool
}
@@ -33,12 +30,7 @@ type builder struct {
func New(opt ...Option) (CUDA, error) {
b := &builder{}
for _, o := range opt {
if err := o(b); err != nil {
return CUDA{}, err
}
}
if b.env == nil {
b.env = make(map[string]string)
o(b)
}
return b.build()
@@ -46,57 +38,36 @@ func New(opt ...Option) (CUDA, error) {
// build creates a CUDA image from the builder.
func (b builder) build() (CUDA, error) {
if b.disableRequire {
b.env[envNVDisableRequire] = "true"
c := make(CUDA)
for _, e := range b.env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid environment variable: %v", e)
}
c[parts[0]] = parts[1]
}
c := CUDA{
env: b.env,
mounts: b.mounts,
if b.disableRequire {
c[envNVDisableRequire] = "true"
}
return c, nil
}
// Option is a functional option for creating a CUDA image.
type Option func(*builder) error
type Option func(*builder)
// WithDisableRequire sets the disable require option.
func WithDisableRequire(disableRequire bool) Option {
return func(b *builder) error {
return func(b *builder) {
b.disableRequire = disableRequire
return nil
}
}
// WithEnv sets the environment variables to use when creating the CUDA image.
// Note that this also overwrites the values set with WithEnvMap.
func WithEnv(env []string) Option {
return func(b *builder) error {
envmap := make(map[string]string)
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return fmt.Errorf("invalid environment variable: %v", e)
}
envmap[parts[0]] = parts[1]
}
return WithEnvMap(envmap)(b)
}
}
// WithEnvMap sets the environment variable map to use when creating the CUDA image.
// Note that this also overwrites the values set with WithEnv.
func WithEnvMap(env map[string]string) Option {
return func(b *builder) error {
return func(b *builder) {
b.env = env
return nil
}
}
// WithMounts sets the mounts associated with the CUDA image.
func WithMounts(mounts []specs.Mount) Option {
return func(b *builder) error {
b.mounts = mounts
return nil
}
}

View File

@@ -73,7 +73,7 @@ func (c DriverCapabilities) Has(capability DriverCapability) bool {
return c[capability]
}
// Any checks whether any of the specified capabilities are set
// Any checks whether any of the specified capabilites are set
func (c DriverCapabilities) Any(capabilities ...DriverCapability) bool {
if c.IsAll() {
return true

View File

@@ -18,13 +18,11 @@ package image
import (
"fmt"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
"tags.cncf.io/container-device-interface/pkg/parser"
)
const (
@@ -39,10 +37,7 @@ const (
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type CUDA struct {
env map[string]string
mounts []specs.Mount
}
type CUDA map[string]string
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
// The process environment is read (if present) to construc the CUDA Image.
@@ -52,10 +47,7 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
env = spec.Process.Env
}
return New(
WithEnv(env),
WithMounts(spec.Mounts),
)
return New(WithEnv(env))
}
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
@@ -64,24 +56,12 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) {
return New(WithEnv(env))
}
// Getenv returns the value of the specified environment variable.
// If the environment variable is not specified, an empty string is returned.
func (i CUDA) Getenv(key string) string {
return i.env[key]
}
// HasEnvvar checks whether the specified envvar is defined in the image.
func (i CUDA) HasEnvvar(key string) bool {
_, exists := i.env[key]
return exists
}
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
// image is considered legacy if it has a CUDA_VERSION environment variable defined
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
func (i CUDA) IsLegacy() bool {
legacyCudaVersion := i.env[envCUDAVersion]
cudaRequire := i.env[envNVRequireCUDA]
legacyCudaVersion := i[envCUDAVersion]
cudaRequire := i[envNVRequireCUDA]
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}
@@ -94,7 +74,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
for name, value := range i.env {
for name, value := range i {
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
requirements = append(requirements, value)
}
@@ -113,7 +93,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
// to a valid (true) boolean value this can be used to disable the requirement checks
func (i CUDA) HasDisableRequire() bool {
if disable, exists := i.env[envNVDisableRequire]; exists {
if disable, exists := i[envNVDisableRequire]; exists {
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
d, _ := strconv.ParseBool(disable)
return d
@@ -124,12 +104,12 @@ func (i CUDA) HasDisableRequire() bool {
// DevicesFromEnvvars returns the devices requested by the image through environment variables
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
// We concantenate all the devices from the specified env.
// We concantenate all the devices from the specified envvars.
var isSet bool
var devices []string
requested := make(map[string]bool)
for _, envVar := range envVars {
if devs, ok := i.env[envVar]; ok {
if devs, ok := i[envVar]; ok {
isSet = true
for _, d := range strings.Split(devs, ",") {
trimmed := strings.TrimSpace(d)
@@ -157,18 +137,18 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
// GetDriverCapabilities returns the requested driver capabilities.
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
env := i.env[envNVDriverCapabilities]
env := i[envNVDriverCapabilities]
capabilities := make(DriverCapabilities)
capabilites := make(DriverCapabilities)
for _, c := range strings.Split(env, ",") {
capabilities[DriverCapability(c)] = true
capabilites[DriverCapability(c)] = true
}
return capabilities
return capabilites
}
func (i CUDA) legacyVersion() (string, error) {
cudaVersion := i.env[envCUDAVersion]
cudaVersion := i[envCUDAVersion]
majorMinor, err := parseMajorMinorVersion(cudaVersion)
if err != nil {
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
@@ -198,79 +178,3 @@ func parseMajorMinorVersion(version string) (string, error) {
}
return majorMinor, nil
}
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
var hasCDIdevice bool
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
if !parser.IsQualifiedName(device) {
return false
}
hasCDIdevice = true
}
for _, device := range i.DevicesFromMounts() {
if !strings.HasPrefix(device, "cdi/") {
return false
}
hasCDIdevice = true
}
return hasCDIdevice
}
const (
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
)
// DevicesFromMounts returns a list of device specified as mounts.
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
func (i CUDA) DevicesFromMounts() []string {
root := filepath.Clean(deviceListAsVolumeMountsRoot)
seen := make(map[string]bool)
var devices []string
for _, m := range i.mounts {
source := filepath.Clean(m.Source)
// Only consider mounts who's host volume is /dev/null
if source != "/dev/null" {
continue
}
destination := filepath.Clean(m.Destination)
if seen[destination] {
continue
}
seen[destination] = true
// Only consider container mount points that begin with 'root'
if !strings.HasPrefix(destination, root) {
continue
}
// Grab the full path beyond 'root' and add it to the list of devices
device := strings.Trim(strings.TrimPrefix(destination, root), "/")
if len(device) == 0 {
continue
}
devices = append(devices, device)
}
return devices
}
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
func (i CUDA) CDIDevicesFromMounts() []string {
var devices []string
for _, mountDevice := range i.DevicesFromMounts() {
if !strings.HasPrefix(mountDevice, "cdi/") {
continue
}
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
if len(parts) != 3 {
continue
}
vendor := parts[0]
class := parts[1]
device := parts[2]
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
}
return devices
}

View File

@@ -126,6 +126,7 @@ func TestGetRequirements(t *testing.T) {
requirements, err := image.GetRequirements()
require.NoError(t, err)
require.ElementsMatch(t, tc.requirements, requirements)
})
}

View File

@@ -154,7 +154,10 @@ func (t Toml) contents() ([]byte, error) {
// format fixes the comments for the config to ensure that they start in column
// 1 and are not followed by a space.
func (t Toml) format(contents []byte) ([]byte, error) {
r := regexp.MustCompile(`(\n*)\s*?#\s*(\S.*)`)
r, err := regexp.Compile(`(\n*)\s*?#\s*(\S.*)`)
if err != nil {
return nil, fmt.Errorf("unable to compile regexp: %v", err)
}
replaced := r.ReplaceAll(contents, []byte("$1#$2"))
return replaced, nil

View File

@@ -23,8 +23,7 @@ import (
)
/*
#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#ifdef _WIN32
#define CUDAAPI __stdcall

View File

@@ -27,13 +27,20 @@ type charDevices mounts
var _ Discover = (*charDevices)(nil)
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
func NewCharDeviceDiscoverer(logger logger.Interface, devRoot string, devices []string) Discover {
func NewCharDeviceDiscoverer(logger logger.Interface, devices []string, root string) Discover {
locator := lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(devRoot),
lookup.WithRoot(root),
)
return (*charDevices)(newMounts(logger, locator, devRoot, devices))
return NewDeviceDiscoverer(logger, locator, root, devices)
}
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
func NewDeviceDiscoverer(logger logger.Interface, locator lookup.Locator, root string, devices []string) Discover {
m := NewMounts(logger, locator, root, devices).(*mounts)
return (*charDevices)(m)
}
// Mounts returns the discovered mounts for the charDevices.

View File

@@ -20,10 +20,9 @@ import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
func TestCharDevices(t *testing.T) {

View File

@@ -30,8 +30,8 @@ type filtered struct {
filter Filter
}
// newFilteredDiscoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
func newFilteredDiscoverer(logger logger.Interface, applyTo Discover, filter Filter) Discover {
// newFilteredDisoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
func newFilteredDisoverer(logger logger.Interface, applyTo Discover, filter Filter) Discover {
return filtered{
Discover: applyTo,
logger: logger,

View File

@@ -1,27 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
func NewGDRCopyDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
return NewCharDeviceDiscoverer(
logger,
devRoot,
[]string{"/dev/gdrdrv"},
), nil
}

View File

@@ -29,17 +29,17 @@ type gdsDeviceDiscoverer struct {
}
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string) (Discover, error) {
func NewGDSDiscoverer(logger logger.Interface, root string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
devRoot,
[]string{"/dev/nvidia-fs*"},
root,
)
udev := NewMounts(
logger,
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(driverRoot)),
driverRoot,
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(root)),
root,
[]string{"/run/udev"},
)
@@ -47,9 +47,9 @@ func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithRoot(root),
),
driverRoot,
root,
[]string{"/etc/cufile.json"},
)

View File

@@ -28,41 +28,53 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
)
// NewDRMNodesDiscoverer returns a discoverer for the DRM device nodes associated with the specified visible devices.
//
// TODO: The logic for creating DRM devices should be consolidated between this
// and the logic for generating CDI specs for a single device. This is only used
// when applying OCI spec modifications to an incoming spec in "legacy" mode.
func NewDRMNodesDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string, nvidiaCTKPath string) (Discover, error) {
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot)
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
func NewGraphicsDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string, nvidiaCTKPath string) (Discover, error) {
mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
if err != nil {
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
}
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
}
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath)
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, driverRoot, nvidiaCTKPath)
discover := Merge(
Merge(drmDeviceNodes, drmByPathSymlinks),
mounts,
)
discover := Merge(drmDeviceNodes, drmByPathSymlinks)
return discover, nil
}
// NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan.
func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
func NewGraphicsMountsDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (Discover, error) {
locator, err := lookup.NewLibraryLocator(logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct library locator: %v", err)
}
libraries := NewMounts(
logger,
driver.Libraries(),
driver.Root,
locator,
driverRoot,
[]string{
"libnvidia-egl-gbm.so.*",
"libnvidia-egl-gbm.so",
},
)
jsonMounts := NewMounts(
logger,
driver.Configs(),
driver.Root,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths("/etc", "/usr/share"),
),
driverRoot,
[]string{
"glvnd/egl_vendor.d/10_nvidia.json",
"vulkan/icd.d/nvidia_icd.json",
@@ -74,7 +86,7 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, n
},
)
xorg := optionalXorgDiscoverer(logger, driver, nvidiaCTKPath)
xorg := optionalXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
discover := Merge(
libraries,
@@ -89,16 +101,16 @@ type drmDevicesByPath struct {
None
logger logger.Interface
nvidiaCTKPath string
devRoot string
driverRoot string
devicesFrom Discover
}
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover {
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, driverRoot string, nvidiaCTKPath string) Discover {
d := drmDevicesByPath{
logger: logger,
nvidiaCTKPath: nvidiaCTKPath,
devRoot: devRoot,
driverRoot: driverRoot,
devicesFrom: devices,
}
@@ -136,7 +148,7 @@ func (d drmDevicesByPath) Hooks() ([]Hook, error) {
return []Hook{hook}, nil
}
// getSpecificLinkArgs returns the required specific links that need to be created
// getSpecificLinkArgs returns the required specic links that need to be created
func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error) {
selectedDevices := make(map[string]bool)
for _, d := range devices {
@@ -145,7 +157,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
linkLocator := lookup.NewFileLocator(
lookup.WithLogger(d.logger),
lookup.WithRoot(d.devRoot),
lookup.WithRoot(d.driverRoot),
)
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
if err != nil {
@@ -171,23 +183,27 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
}
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Discover, error) {
allDevices := NewCharDeviceDiscoverer(
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Discover, error) {
allDevices := NewDeviceDiscoverer(
logger,
devRoot,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
[]string{
"/dev/dri/card*",
"/dev/dri/renderD*",
},
)
filter, err := newDRMDeviceFilter(devices, devRoot)
filter, err := newDRMDeviceFilter(logger, devices, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
}
// We return a discoverer that applies the DRM device filter created above to all discovered DRM device nodes.
d := newFilteredDiscoverer(
d := newFilteredDisoverer(
logger,
allDevices,
filter,
@@ -197,8 +213,8 @@ func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevice
}
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
func newDRMDeviceFilter(devices image.VisibleDevices, devRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(devRoot)
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to read GPU information: %v", err)
}
@@ -225,7 +241,7 @@ func newDRMDeviceFilter(devices image.VisibleDevices, devRoot string) (Filter, e
return nil, fmt.Errorf("failed to determine DRM devices for %v: %v", busID, err)
}
for _, drmDeviceNode := range drmDeviceNodes {
filter[drmDeviceNode] = true
filter[filepath.Join(drmDeviceNode)] = true
}
}
@@ -242,8 +258,8 @@ var _ Discover = (*xorgHooks)(nil)
// optionalXorgDiscoverer creates a discoverer for Xorg libraries.
// If the creation of the discoverer fails, a None discoverer is returned.
func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) Discover {
xorg, err := newXorgDiscoverer(logger, driver, nvidiaCTKPath)
func optionalXorgDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) Discover {
xorg, err := newXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
if err != nil {
logger.Warningf("Failed to create Xorg discoverer: %v; skipping xorg libraries", err)
return None{}
@@ -251,9 +267,10 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia
return xorg
}
func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
func newXorgDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (Discover, error) {
libCudaPaths, err := cuda.New(
driver.Libraries(),
cuda.WithLogger(logger),
cuda.WithDriverRoot(driverRoot),
).Locate(".*.*")
if err != nil {
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
@@ -270,11 +287,11 @@ func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPa
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driver.Root),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths(libRoot, "/usr/lib/x86_64-linux-gnu"),
lookup.WithCount(1),
),
driver.Root,
driverRoot,
[]string{
"nvidia/xorg/nvidia_drv.so",
fmt.Sprintf("nvidia/xorg/libglxserver_nvidia.so.%s", version),
@@ -286,16 +303,20 @@ func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPa
nvidiaCTKPath: nvidiaCTKPath,
}
xorgConfig := NewMounts(
xorgConfg := NewMounts(
logger,
driver.Configs(),
driver.Root,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths("/usr/share"),
),
driverRoot,
[]string{"X11/xorg.conf.d/10-nvidia.conf"},
)
d := Merge(
xorgLibs,
xorgConfig,
xorgConfg,
xorgHooks,
)

View File

@@ -19,7 +19,7 @@ package discover
import (
"path/filepath"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
)
var _ Discover = (*Hook)(nil)

View File

@@ -19,10 +19,9 @@ package discover
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
func TestIPCMounts(t *testing.T) {

View File

@@ -70,7 +70,7 @@ func (d *ipcMounts) Mounts() ([]Mount, error) {
var modifiedMounts []Mount
for _, m := range mounts {
mount := m
mount.Options = append(mount.Options, "noexec")
mount.Options = append(m.Options, "noexec")
modifiedMounts = append(modifiedMounts, mount)
}

View File

@@ -25,11 +25,10 @@ import (
)
// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath, ldconfigPath string) (Discover, error) {
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath string) (Discover, error) {
d := ldconfig{
logger: logger,
nvidiaCTKPath: nvidiaCTKPath,
ldconfigPath: ldconfigPath,
mountsFrom: mounts,
}
@@ -40,7 +39,6 @@ type ldconfig struct {
None
logger logger.Interface
nvidiaCTKPath string
ldconfigPath string
mountsFrom Discover
}
@@ -52,20 +50,14 @@ func (d ldconfig) Hooks() ([]Hook, error) {
}
h := CreateLDCacheUpdateHook(
d.nvidiaCTKPath,
d.ldconfigPath,
getLibraryPaths(mounts),
)
return []Hook{h}, nil
}
// CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache
func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []string) Hook {
func CreateLDCacheUpdateHook(executable string, libraries []string) Hook {
var args []string
if ldconfig != "" {
args = append(args, "--ldconfig-path", ldconfig)
}
for _, f := range uniqueFolders(libraries) {
args = append(args, "--folder", f)
}
@@ -77,6 +69,7 @@ func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []str
)
return hook
}
// getLibraryPaths extracts the library dirs from the specified mounts
@@ -93,6 +86,7 @@ func getLibraryPaths(mounts []Mount) []string {
// isLibName checks if the specified filename is a library (i.e. ends in `.so*`)
func isLibName(filename string) bool {
base := filepath.Base(filename)
isLib, err := filepath.Match("lib?*.so*", base)

View File

@@ -26,7 +26,6 @@ import (
const (
testNvidiaCTKPath = "/foo/bar/nvidia-ctk"
testLdconfigPath = "/bar/baz/ldconfig"
)
func TestLDCacheUpdateHook(t *testing.T) {
@@ -34,7 +33,6 @@ func TestLDCacheUpdateHook(t *testing.T) {
testCases := []struct {
description string
ldconfigPath string
mounts []Mount
mountError error
expectedError error
@@ -77,11 +75,6 @@ func TestLDCacheUpdateHook(t *testing.T) {
},
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"},
},
{
description: "explicit ldconfig path is passed",
ldconfigPath: testLdconfigPath,
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
},
}
for _, tc := range testCases {
@@ -97,7 +90,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
Lifecycle: "createContainer",
}
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath, tc.ldconfigPath)
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath)
require.NoError(t, err)
hooks, err := d.Hooks()
@@ -121,8 +114,10 @@ func TestLDCacheUpdateHook(t *testing.T) {
mounts, err := d.Mounts()
require.NoError(t, err)
require.Empty(t, mounts)
})
}
}
func TestIsLibName(t *testing.T) {

View File

@@ -27,7 +27,7 @@ type list struct {
var _ Discover = (*list)(nil)
// Merge creates a discoverer that is the composite of a list of discoverers.
// Merge creates a discoverer that is the composite of a list of discoveres.
func Merge(d ...Discover) Discover {
l := list{
discoverers: d,

View File

@@ -19,14 +19,14 @@ package discover
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
func NewMOFEDDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
func NewMOFEDDiscoverer(logger logger.Interface, root string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
devRoot,
[]string{
"/dev/infiniband/uverbs*",
"/dev/infiniband/rdma_cm",
},
root,
)
return devices, nil

View File

@@ -20,9 +20,8 @@ import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/stretchr/testify/require"
testlog "github.com/sirupsen/logrus/hooks/test"
)

View File

@@ -1,33 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
// NewNvSwitchDiscoverer creates a discoverer for NVSWITCH devices.
func NewNvSwitchDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
devRoot,
[]string{
"/dev/nvidia-nvswitchctl",
"/dev/nvidia-nvswitch*",
},
)
return devices, nil
}

View File

@@ -16,6 +16,15 @@
package dxcore
import (
"github.com/NVIDIA/go-nvml/pkg/dl"
)
const (
libraryName = "libdxcore.so"
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
)
// dxcore stores a reference the dxcore dynamic library
var dxcore *context

View File

@@ -41,17 +41,14 @@ static const char * const dxcore_nvidia_driver_store_components[] = {
*/
struct dxcore_enumAdapters2;
struct dxcore_enumAdapters3;
struct dxcore_queryAdapterInfo;
typedef int(*pfnDxcoreEnumAdapters2)(struct dxcore_enumAdapters2* pParams);
typedef int(*pfnDxcoreEnumAdapters3)(struct dxcore_enumAdapters3* pParams);
typedef int(*pfnDxcoreQueryAdapterInfo)(struct dxcore_queryAdapterInfo* pParams);
struct dxcore_lib {
void* hDxcoreLib;
pfnDxcoreEnumAdapters2 pDxcoreEnumAdapters2;
pfnDxcoreEnumAdapters3 pDxcoreEnumAdapters3;
pfnDxcoreQueryAdapterInfo pDxcoreQueryAdapterInfo;
};
@@ -69,15 +66,6 @@ struct dxcore_enumAdapters2
struct dxcore_adapterInfo *pAdapters;
};
#define ENUMADAPTER3_FILTER_COMPUTE_ONLY (0x0000000000000001)
struct dxcore_enumAdapters3
{
unsigned long long Filter;
unsigned int NumAdapters;
struct dxcore_adapterInfo *pAdapters;
};
enum dxcore_kmtqueryAdapterInfoType
{
DXCORE_QUERYDRIVERVERSION = 13,
@@ -251,37 +239,7 @@ static void dxcore_add_adapter(struct dxcore_context* pCtx, struct dxcore_lib* p
log_infof("Adding new adapter via dxcore hAdapter:%x luid:%llx wddm version:%d", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid), wddmVersion);
}
static int dxcore_enum_adapters3(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
{
struct dxcore_enumAdapters3 params = {0};
unsigned int adapterIndex = 0;
// Include compute-only in addition to display+compute adapters
params.Filter = ENUMADAPTER3_FILTER_COMPUTE_ONLY;
params.NumAdapters = 0;
params.pAdapters = NULL;
if (pLib->pDxcoreEnumAdapters3(&params)) {
log_err("Failed to enumerate adapters via enumAdapers3");
return 1;
}
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
if (pLib->pDxcoreEnumAdapters3(&params)) {
free(params.pAdapters);
log_err("Failed to enumerate adapters via enumAdapers3");
return 1;
}
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
dxcore_add_adapter(pCtx, pLib, &params.pAdapters[adapterIndex]);
}
free(params.pAdapters);
return 0;
}
static int dxcore_enum_adapters2(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
{
struct dxcore_enumAdapters2 params = {0};
unsigned int adapterIndex = 0;
@@ -290,15 +248,15 @@ static int dxcore_enum_adapters2(struct dxcore_context* pCtx, struct dxcore_lib*
params.pAdapters = NULL;
if (pLib->pDxcoreEnumAdapters2(&params)) {
log_err("Failed to enumerate adapters via enumAdapters2");
return 1;
log_err("Failed to enumerate adapters via dxcore");
return;
}
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
if (pLib->pDxcoreEnumAdapters2(&params)) {
free(params.pAdapters);
log_err("Failed to enumerate adapters via enumAdapters2");
return 1;
log_err("Failed to enumerate adapters via dxcore");
return;
}
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
@@ -306,27 +264,6 @@ static int dxcore_enum_adapters2(struct dxcore_context* pCtx, struct dxcore_lib*
}
free(params.pAdapters);
return 0;
}
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
{
int status;
if (pLib->pDxcoreEnumAdapters3) {
status = dxcore_enum_adapters3(pCtx, pLib);
if (status == 0) {
return;
}
}
// Fall back to EnumAdapters2 if the OS doesn't support EnumAdapters3
if (pLib->pDxcoreEnumAdapters2) {
status = dxcore_enum_adapters2(pCtx, pLib);
if (status == 0) {
return;
}
}
log_err("Failed to enumerate adapters via dxcore");
}
int dxcore_init_context(struct dxcore_context* pCtx)
@@ -343,9 +280,8 @@ int dxcore_init_context(struct dxcore_context* pCtx)
}
lib.pDxcoreEnumAdapters2 = (pfnDxcoreEnumAdapters2)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters2");
lib.pDxcoreEnumAdapters3 = (pfnDxcoreEnumAdapters3)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters3");
if (!lib.pDxcoreEnumAdapters2 && !lib.pDxcoreEnumAdapters3) {
log_err("dxcore library is present but the symbols D3DKMTEnumAdapters2 and D3DKMTEnumAdapters3 are missing");
if (!lib.pDxcoreEnumAdapters2) {
log_err("dxcore library is present but the symbol D3DKMTEnumAdapters2 is missing");
goto error;
}

View File

@@ -17,9 +17,7 @@
package dxcore
/*
#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#include <dxcore.h>
*/
import "C"

View File

@@ -17,10 +17,9 @@
package edits
import (
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type device discover.Device

View File

@@ -20,10 +20,9 @@ import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
)
func TestDeviceToSpec(t *testing.T) {

View File

@@ -19,13 +19,12 @@ package edits
import (
"fmt"
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
)
type edits struct {

View File

@@ -19,9 +19,8 @@ package edits
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/stretchr/testify/require"
)
func TestFromDiscovererAllowsMountsToIterate(t *testing.T) {

View File

@@ -17,10 +17,9 @@
package edits
import (
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type hook discover.Hook

View File

@@ -17,10 +17,9 @@
package edits
import (
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type mount discover.Mount

View File

@@ -20,9 +20,9 @@ import (
"fmt"
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// additionalInfo allows for the info.Interface to be extened to implement the infoInterface.
@@ -52,9 +52,7 @@ func (i additionalInfo) UsesNVGPUModule() (uses bool, reason string) {
if ret != nvml.SUCCESS {
return false, fmt.Sprintf("failed to initialize nvml: %v", ret)
}
defer func() {
_ = i.nvmllib.Shutdown()
}()
defer i.nvmllib.Shutdown()
var names []string

View File

@@ -19,9 +19,9 @@ package info
import (
"testing"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/stretchr/testify/require"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
func TestUsesNVGPUModule(t *testing.T) {

View File

@@ -17,12 +17,12 @@
package info
import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// infoInterface provides an alias for mocking.
@@ -70,7 +70,7 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
r.logger.Infof("Auto-detected mode as '%v'", rmode)
}()
if image.OnlyFullyQualifiedCDIDevices() {
if onlyFullyQualifiedCDIDevices(image) {
return "cdi"
}
@@ -89,3 +89,14 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
return "legacy"
}
func onlyFullyQualifiedCDIDevices(image image.CUDA) bool {
var hasCDIdevice bool
for _, device := range image.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
if !cdi.IsQualifiedName(device) {
return false
}
hasCDIdevice = true
}
return hasCDIdevice
}

View File

@@ -19,11 +19,9 @@ package info
import (
"testing"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
)
func TestResolveAutoMode(t *testing.T) {
@@ -34,8 +32,7 @@ func TestResolveAutoMode(t *testing.T) {
mode string
expectedMode string
info map[string]bool
envmap map[string]string
mounts []string
image image.CUDA
}{
{
description: "non-auto resolves to input",
@@ -122,7 +119,7 @@ func TestResolveAutoMode(t *testing.T) {
description: "cdi devices resolves to cdi",
mode: "auto",
expectedMode: "cdi",
envmap: map[string]string{
image: image.CUDA{
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=all",
},
},
@@ -130,14 +127,14 @@ func TestResolveAutoMode(t *testing.T) {
description: "multiple cdi devices resolves to cdi",
mode: "auto",
expectedMode: "cdi",
envmap: map[string]string{
image: image.CUDA{
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,nvidia.com/gpu=1",
},
},
{
description: "at least one non-cdi device resolves to legacy",
mode: "auto",
envmap: map[string]string{
image: image.CUDA{
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
},
info: map[string]bool{
@@ -150,7 +147,7 @@ func TestResolveAutoMode(t *testing.T) {
{
description: "at least one non-cdi device resolves to csv",
mode: "auto",
envmap: map[string]string{
image: image.CUDA{
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
},
info: map[string]bool{
@@ -160,44 +157,6 @@ func TestResolveAutoMode(t *testing.T) {
},
expectedMode: "csv",
},
{
description: "cdi mount devices resolves to CDI",
mode: "auto",
mounts: []string{
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
},
expectedMode: "cdi",
},
{
description: "cdi mount and non-CDI devices resolves to legacy",
mode: "auto",
mounts: []string{
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
"/var/run/nvidia-container-devices/all",
},
info: map[string]bool{
"nvml": true,
"tegra": false,
"nvgpu": false,
},
expectedMode: "legacy",
},
{
description: "cdi mount and non-CDI envvar resolves to legacy",
mode: "auto",
envmap: map[string]string{
"NVIDIA_VISIBLE_DEVICES": "0",
},
mounts: []string{
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
},
info: map[string]bool{
"nvml": true,
"tegra": false,
"nvgpu": false,
},
expectedMode: "legacy",
},
}
for _, tc := range testCases {
@@ -218,20 +177,7 @@ func TestResolveAutoMode(t *testing.T) {
logger: logger,
info: info,
}
var mounts []specs.Mount
for _, d := range tc.mounts {
mount := specs.Mount{
Source: "/dev/null",
Destination: d,
}
mounts = append(mounts, mount)
}
image, _ := image.New(
image.WithEnvMap(tc.envmap),
image.WithMounts(mounts),
)
mode := r.resolveMode(tc.mode, image)
mode := r.resolveMode(tc.mode, tc.image)
require.EqualValues(t, tc.expectedMode, mode)
})
}

View File

@@ -1,62 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devices
type builder struct {
asMap devices
filter func(string) bool
}
// New creates a new devices struct with the specified options.
func New(opts ...Option) Devices {
b := &builder{}
for _, opt := range opts {
opt(b)
}
if b.filter == nil {
b.filter = func(string) bool { return false }
}
devices := make(devices)
for k, v := range b.asMap {
if b.filter(string(k)) {
continue
}
devices[k] = v
}
return devices
}
type Option func(*builder)
// WithDeviceToMajor specifies an explicit device name to major number map.
func WithDeviceToMajor(deviceToMajor map[string]int) Option {
return func(b *builder) {
b.asMap = make(devices)
for name, major := range deviceToMajor {
b.asMap[Name(name)] = Major(major)
}
}
}
// WithFilter specifies a filter to exclude devices.
func WithFilter(filter func(string) bool) Option {
return func(b *builder) {
b.filter = filter
}
}

View File

@@ -33,7 +33,7 @@ const (
NVIDIAModesetMinor = 254
NVIDIAFrontend = Name("nvidia-frontend")
NVIDIAGPU = Name("nvidia")
NVIDIAGPU = NVIDIAFrontend
NVIDIACaps = Name("nvidia-caps")
NVIDIAUVM = Name("nvidia-uvm")
@@ -53,43 +53,22 @@ type Major int
type Devices interface {
Exists(Name) bool
Get(Name) (Major, bool)
Count() int
}
type devices map[Name]Major
var _ Devices = devices(nil)
// Count returns the number of devices defined.
func (d devices) Count() int {
return len(d)
}
// Exists checks if a Device with a given name exists or not
func (d devices) Exists(name Name) bool {
_, exists := d.Get(name)
_, exists := d[name]
return exists
}
// Get a Device from Devices. It also has fallback logic to ensure device name changes in /proc/devices are handled
// For e.g:- For GPU drivers 550.40.x or greater, the gpu device has been renamed from "nvidia-frontend" to "nvidia".
// Get a Device from Devices
func (d devices) Get(name Name) (Major, bool) {
for _, n := range name.getWithFallback() {
device, exists := d[n]
if exists {
return device, true
}
}
return 0, false
}
// getWithFallback returns a prioritised list of device names for a specific name.
// This allows multiple names to be associated with a single name to support various driver versions.
func (n Name) getWithFallback() []Name {
if n == NVIDIAGPU || n == NVIDIAFrontend {
return []Name{NVIDIAGPU, NVIDIAFrontend}
}
return []Name{n}
device, exists := d[name]
return device, exists
}
// GetNVIDIADevices returns the set of NVIDIA Devices on the machine
@@ -115,23 +94,27 @@ func nvidiaDevices(devicesPath string) (Devices, error) {
var errNoNvidiaDevices = errors.New("no NVIDIA devices found")
func nvidiaDeviceFrom(reader io.Reader) (Devices, error) {
func nvidiaDeviceFrom(reader io.Reader) (devices, error) {
allDevices := devicesFrom(reader)
nvidiaDevices := make(devices)
nvidiaDevices := New(
WithDeviceToMajor(allDevices),
WithFilter(func(n string) bool {
return !strings.HasPrefix(n, nvidiaDevicePrefix)
}),
)
if nvidiaDevices.Count() == 0 {
var hasNvidiaDevices bool
for n, d := range allDevices {
if !strings.HasPrefix(string(n), nvidiaDevicePrefix) {
continue
}
nvidiaDevices[n] = d
hasNvidiaDevices = true
}
if !hasNvidiaDevices {
return nil, errNoNvidiaDevices
}
return nvidiaDevices, nil
}
func devicesFrom(reader io.Reader) map[string]int {
allDevices := make(map[string]int)
func devicesFrom(reader io.Reader) devices {
allDevices := make(devices)
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
device, major, err := processProcDeviceLine(scanner.Text())
@@ -143,11 +126,11 @@ func devicesFrom(reader io.Reader) map[string]int {
return allDevices
}
func processProcDeviceLine(line string) (string, int, error) {
func processProcDeviceLine(line string) (Name, Major, error) {
trimmed := strings.TrimSpace(line)
var name string
var major int
var name Name
var major Major
n, _ := fmt.Sscanf(trimmed, "%d %s", &major, &name)
if n == 2 {

Some files were not shown because too many files have changed in this diff Show More