mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
133 Commits
pull-reque
...
v1.17.7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bae3e7842e | ||
|
|
e78999b08c | ||
|
|
462ca9f93f | ||
|
|
ac9146832b | ||
|
|
a734438ce2 | ||
|
|
61d94f7856 | ||
|
|
e2ff6830f5 | ||
|
|
ab050837ce | ||
|
|
becddb70e6 | ||
|
|
8069346746 | ||
|
|
34526b19c0 | ||
|
|
f8b0b43a3f | ||
|
|
ce6928ccca | ||
|
|
63e8ecbc8e | ||
|
|
d4739cb17f | ||
|
|
e8ac80146f | ||
|
|
dc0dee1f33 | ||
|
|
21827ad367 | ||
|
|
651e9f541a | ||
|
|
56b80c94b0 | ||
|
|
e096251183 | ||
|
|
cf35409004 | ||
|
|
8012e4f1be | ||
|
|
570e223276 | ||
|
|
e627eb2e21 | ||
|
|
24859f56d2 | ||
|
|
8676b5625a | ||
|
|
6bb4a5c7de | ||
|
|
a8e7ffcc95 | ||
|
|
58f54b937a | ||
|
|
8176ac40ee | ||
|
|
01e55461e8 | ||
|
|
32fe41a3d5 | ||
|
|
3436b5b032 | ||
|
|
c4f46e7354 | ||
|
|
753b5d1595 | ||
|
|
e0b651668d | ||
|
|
6e59255149 | ||
|
|
a152a2fd7e | ||
|
|
b43c8c424e | ||
|
|
f785e908a7 | ||
|
|
ef941f423c | ||
|
|
90d30740a4 | ||
|
|
35e0dea1d3 | ||
|
|
ac9eee956b | ||
|
|
13bccdda73 | ||
|
|
bdca5b83a1 | ||
|
|
997d9a774f | ||
|
|
a26ba7b2a7 | ||
|
|
1c13a9647c | ||
|
|
4e84c0dc50 | ||
|
|
997f23cf11 | ||
|
|
e4f8406139 | ||
|
|
aa0d4af51a | ||
|
|
7b3ec6f42d | ||
|
|
936827d09f | ||
|
|
267fb5987f | ||
|
|
eb48d2d5fd | ||
|
|
b71bb87d91 | ||
|
|
cc88c554ed | ||
|
|
ce7cea3a0d | ||
|
|
1bc9548a2f | ||
|
|
7c758c97b8 | ||
|
|
48d538eef9 | ||
|
|
9848c3e985 | ||
|
|
868f385a01 | ||
|
|
069926e4b6 | ||
|
|
91a983a341 | ||
|
|
f5680dd0cd | ||
|
|
5bdf14b1e7 | ||
|
|
b598826ff2 | ||
|
|
c1bac2873b | ||
|
|
9f611a5a23 | ||
|
|
e330a938fd | ||
|
|
f445d4b614 | ||
|
|
e1ae57eef9 | ||
|
|
76040ff2ad | ||
|
|
fd865bb9e7 | ||
|
|
6b037a0dde | ||
|
|
9eccc1659d | ||
|
|
6da7af8dfa | ||
|
|
b170a35328 | ||
|
|
2b11b7eaf2 | ||
|
|
82090b547e | ||
|
|
f452ef4747 | ||
|
|
c3622abeac | ||
|
|
c599c6cc62 | ||
|
|
9b69590c74 | ||
|
|
9f6970944f | ||
|
|
5ac593eac2 | ||
|
|
07f45ea74f | ||
|
|
2310ed76d8 | ||
|
|
f2b3e8d381 | ||
|
|
65ef5e38dd | ||
|
|
12367de49c | ||
|
|
aeb82cb9a2 | ||
|
|
628516a5eb | ||
|
|
487d07d07a | ||
|
|
aaac4ec23e | ||
|
|
bed57fdba0 | ||
|
|
4237556078 | ||
|
|
23b2970406 | ||
|
|
3da3e5135c | ||
|
|
7deb5eb512 | ||
|
|
62517e68c6 | ||
|
|
2918059302 | ||
|
|
374a72c953 | ||
|
|
f91791b4d1 | ||
|
|
1f4e2a25c7 | ||
|
|
a390964e9c | ||
|
|
56faf71991 | ||
|
|
b1c02f4b05 | ||
|
|
4e4a129382 | ||
|
|
cb82e29c75 | ||
|
|
b008af4141 | ||
|
|
8c4b338986 | ||
|
|
cab9ed4803 | ||
|
|
6f0dce9b0a | ||
|
|
4f09568fa1 | ||
|
|
2abe1268b4 | ||
|
|
c90338dd86 | ||
|
|
0322f85690 | ||
|
|
fa66e4cd56 | ||
|
|
aac7258b6f | ||
|
|
70ac1e2d28 | ||
|
|
f774ceeedd | ||
|
|
1467f3f339 | ||
|
|
ca9612a9ff | ||
|
|
11e4af3e8a | ||
|
|
edf5d970f4 | ||
|
|
b03e942424 | ||
|
|
a9185918ab | ||
|
|
3cb613a12b |
128
.github/dependabot.yml
vendored
128
.github/dependabot.yml
vendored
@@ -3,63 +3,43 @@
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
# main branch
|
||||
- package-ecosystem: "gomod"
|
||||
target-branch: main
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
ignore:
|
||||
- dependency-name: k8s.io/*
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
- package-ecosystem: "docker"
|
||||
target-branch: main
|
||||
directory: "/deployments/container"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
|
||||
- package-ecosystem: "gomod"
|
||||
# This defines a specific dependabot rule for the latest release-* branch.
|
||||
target-branch: release-1.16
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
ignore:
|
||||
- dependency-name: k8s.io/*
|
||||
labels:
|
||||
- dependencies
|
||||
- maintenance
|
||||
|
||||
- package-ecosystem: "docker"
|
||||
target-branch: release-1.16
|
||||
directory: "/deployments/container"
|
||||
directories:
|
||||
- "/"
|
||||
- "deployments/devel"
|
||||
- "tests"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
- maintenance
|
||||
groups:
|
||||
k8sio:
|
||||
patterns:
|
||||
- k8s.io/*
|
||||
exclude-patterns:
|
||||
- k8s.io/klog/*
|
||||
|
||||
- package-ecosystem: "gomod"
|
||||
target-branch: main
|
||||
directory: "deployments/devel"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
|
||||
# A dependabot rule to bump the golang version.
|
||||
- package-ecosystem: "docker"
|
||||
target-branch: main
|
||||
directory: "/deployments/devel"
|
||||
directories:
|
||||
# CUDA image
|
||||
- "/deployments/container"
|
||||
# Golang version
|
||||
- "/deployments/devel"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
target-branch: main
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
# Allow dependabot to update the libnvidia-container submodule.
|
||||
- package-ecosystem: "gitsubmodule"
|
||||
@@ -72,3 +52,69 @@ updates:
|
||||
labels:
|
||||
- dependencies
|
||||
- libnvidia-container
|
||||
|
||||
# The release branch(es):
|
||||
- package-ecosystem: "gomod"
|
||||
target-branch: release-1.17
|
||||
directories:
|
||||
- "/"
|
||||
# We don't update development or test dependencies on release branches
|
||||
# - "deployments/devel"
|
||||
# - "tests"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
labels:
|
||||
- dependencies
|
||||
- maintenance
|
||||
ignore:
|
||||
# For release branches we only consider patch updates.
|
||||
- dependency-name: "*"
|
||||
update-types:
|
||||
- version-update:semver-major
|
||||
- version-update:semver-minor
|
||||
groups:
|
||||
k8sio:
|
||||
patterns:
|
||||
- k8s.io/*
|
||||
exclude-patterns:
|
||||
- k8s.io/klog/*
|
||||
|
||||
- package-ecosystem: "docker"
|
||||
target-branch: release-1.17
|
||||
directories:
|
||||
# CUDA image
|
||||
- "/deployments/container"
|
||||
# Golang version
|
||||
- "/deployments/devel"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
ignore:
|
||||
# For release branches we only apply patch updates to the golang version.
|
||||
- dependency-name: "*golang*"
|
||||
update-types:
|
||||
- version-update:semver-major
|
||||
- version-update:semver-minor
|
||||
labels:
|
||||
- dependencies
|
||||
- maintenance
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
target-branch: release-1.17
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
labels:
|
||||
- dependencies
|
||||
- maintenance
|
||||
|
||||
# Github actions need to be gh-pages branches.
|
||||
- package-ecosystem: "github-actions"
|
||||
target-branch: gh-pages
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
53
.github/workflows/ci.yaml
vendored
Normal file
53
.github/workflows/ci.yaml
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
# Copyright 2025 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: CI Pipeline
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "pull-request/[0-9]+"
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
code-scanning:
|
||||
uses: ./.github/workflows/code_scanning.yaml
|
||||
|
||||
variables:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
steps:
|
||||
- name: Generate Commit Short SHA
|
||||
id: version
|
||||
run: echo "version=$(echo $GITHUB_SHA | cut -c1-8)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
golang:
|
||||
uses: ./.github/workflows/golang.yaml
|
||||
|
||||
image:
|
||||
uses: ./.github/workflows/image.yaml
|
||||
needs: [variables, golang, code-scanning]
|
||||
secrets: inherit
|
||||
with:
|
||||
version: ${{ needs.variables.outputs.version }}
|
||||
build_multi_arch_images: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release-') }}
|
||||
|
||||
e2e-test:
|
||||
needs: [image, variables]
|
||||
secrets: inherit
|
||||
uses: ./.github/workflows/e2e.yaml
|
||||
with:
|
||||
version: ${{ needs.variables.outputs.version }}
|
||||
5
.github/workflows/code_scanning.yaml
vendored
5
.github/workflows/code_scanning.yaml
vendored
@@ -15,6 +15,7 @@
|
||||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
workflow_call: {}
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
@@ -22,10 +23,6 @@ on:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
|
||||
98
.github/workflows/e2e.yaml
vendored
Normal file
98
.github/workflows/e2e.yaml
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
# Copyright 2025 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: End-to-end Tests
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
required: true
|
||||
type: string
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID:
|
||||
required: true
|
||||
AWS_SECRET_ACCESS_KEY:
|
||||
required: true
|
||||
AWS_SSH_KEY:
|
||||
required: true
|
||||
E2E_SSH_USER:
|
||||
required: true
|
||||
SLACK_BOT_TOKEN:
|
||||
required: true
|
||||
SLACK_CHANNEL_ID:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
e2e-tests:
|
||||
runs-on: linux-amd64-cpu4
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Calculate build vars
|
||||
id: vars
|
||||
run: |
|
||||
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
|
||||
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
|
||||
GOLANG_VERSION=$(./hack/golang-version.sh)
|
||||
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
|
||||
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GOLANG_VERSION }}
|
||||
|
||||
- name: Set up Holodeck
|
||||
uses: NVIDIA/holodeck@v0.2.7
|
||||
with:
|
||||
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws_ssh_key: ${{ secrets.AWS_SSH_KEY }}
|
||||
holodeck_config: "tests/e2e/infra/aws.yaml"
|
||||
|
||||
- name: Get public dns name
|
||||
id: holodeck_public_dns_name
|
||||
uses: mikefarah/yq@master
|
||||
with:
|
||||
cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
|
||||
|
||||
- name: Run e2e tests
|
||||
env:
|
||||
IMAGE_NAME: ghcr.io/nvidia/container-toolkit
|
||||
VERSION: ${{ inputs.version }}
|
||||
SSH_KEY: ${{ secrets.AWS_SSH_KEY }}
|
||||
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
|
||||
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
|
||||
E2E_INSTALL_CTK: "true"
|
||||
run: |
|
||||
e2e_ssh_key=$(mktemp)
|
||||
echo "$SSH_KEY" > "$e2e_ssh_key"
|
||||
chmod 600 "$e2e_ssh_key"
|
||||
export E2E_SSH_KEY="$e2e_ssh_key"
|
||||
|
||||
make -f tests/e2e/Makefile test
|
||||
|
||||
- name: Send Slack alert notification
|
||||
if: ${{ failure() }}
|
||||
uses: slackapi/slack-github-action@v2.1.0
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: ${{ secrets.SLACK_CHANNEL_ID }}
|
||||
text: |
|
||||
:x: On repository ${{ github.repository }}, the Workflow *${{ github.workflow }}* has failed.
|
||||
|
||||
Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
5
.github/workflows/golang.yaml
vendored
5
.github/workflows/golang.yaml
vendored
@@ -15,6 +15,7 @@
|
||||
name: Golang
|
||||
|
||||
on:
|
||||
workflow_call: {}
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
@@ -22,10 +23,6 @@ on:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
check:
|
||||
|
||||
68
.github/workflows/image.yaml
vendored
68
.github/workflows/image.yaml
vendored
@@ -16,21 +16,18 @@
|
||||
name: image
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
required: true
|
||||
type: string
|
||||
build_multi_arch_images:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
packages:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: linux-amd64-cpu4
|
||||
strategy:
|
||||
matrix:
|
||||
target:
|
||||
@@ -41,7 +38,7 @@ jobs:
|
||||
- centos7-x86_64
|
||||
- centos8-ppc64le
|
||||
ispr:
|
||||
- ${{github.event_name == 'pull_request'}}
|
||||
- ${{ github.ref_name != 'main' && !startsWith( github.ref_name, 'release-' ) }}
|
||||
exclude:
|
||||
- ispr: true
|
||||
target: ubuntu18.04-arm64
|
||||
@@ -52,18 +49,25 @@ jobs:
|
||||
- ispr: true
|
||||
target: centos8-ppc64le
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:master
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: build ${{ matrix.target }} packages
|
||||
run: |
|
||||
sudo apt-get install -y coreutils build-essential sed git bash make
|
||||
echo "Building packages"
|
||||
./scripts/build-packages.sh ${{ matrix.target }}
|
||||
|
||||
- name: 'Upload Artifacts'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@@ -72,7 +76,7 @@ jobs:
|
||||
path: ${{ github.workspace }}/dist/*
|
||||
|
||||
image:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: linux-amd64-cpu4
|
||||
strategy:
|
||||
matrix:
|
||||
dist:
|
||||
@@ -80,7 +84,7 @@ jobs:
|
||||
- ubi8
|
||||
- packaging
|
||||
ispr:
|
||||
- ${{github.event_name == 'pull_request'}}
|
||||
- ${{ github.ref_name != 'main' && !startsWith( github.ref_name, 'release-' ) }}
|
||||
exclude:
|
||||
- ispr: true
|
||||
dist: ubi8
|
||||
@@ -88,34 +92,15 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
- name: Calculate build vars
|
||||
id: vars
|
||||
run: |
|
||||
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
|
||||
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
|
||||
REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
|
||||
echo "${REPO_FULL_NAME}"
|
||||
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
|
||||
|
||||
PUSH_ON_BUILD="false"
|
||||
BUILD_MULTI_ARCH_IMAGES="false"
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
if [[ "${{ github.actor }}" != "dependabot[bot]" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
|
||||
# For non-fork PRs that are not created by dependabot we do push images
|
||||
PUSH_ON_BUILD="true"
|
||||
fi
|
||||
elif [[ "${{ github.event_name }}" == "push" ]]; then
|
||||
# On push events we do generate images and enable muilti-arch builds
|
||||
PUSH_ON_BUILD="true"
|
||||
BUILD_MULTI_ARCH_IMAGES="true"
|
||||
fi
|
||||
echo "PUSH_ON_BUILD=${PUSH_ON_BUILD}" >> $GITHUB_ENV
|
||||
echo "BUILD_MULTI_ARCH_IMAGES=${BUILD_MULTI_ARCH_IMAGES}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:master
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Get built packages
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
@@ -129,10 +114,13 @@ jobs:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build image
|
||||
env:
|
||||
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/container-toolkit
|
||||
VERSION: ${COMMIT_SHORT_SHA}
|
||||
IMAGE_NAME: ghcr.io/nvidia/container-toolkit
|
||||
VERSION: ${{ inputs.version }}
|
||||
PUSH_ON_BUILD: "true"
|
||||
BUILD_MULTI_ARCH_IMAGES: ${{ inputs.build_multi_arch_images }}
|
||||
run: |
|
||||
echo "${VERSION}"
|
||||
make -f deployments/container/Makefile build-${{ matrix.dist }}
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,7 +3,7 @@ artifacts
|
||||
*.swp
|
||||
*.swo
|
||||
/coverage.out*
|
||||
/test/output/
|
||||
/tests/output/
|
||||
/nvidia-container-runtime
|
||||
/nvidia-container-runtime.*
|
||||
/nvidia-container-runtime-hook
|
||||
|
||||
75
CHANGELOG.md
75
CHANGELOG.md
@@ -1,5 +1,80 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.17.7
|
||||
|
||||
- Fix mode detection on Thor-based systems. This correctly resolves `auto` mode to `csv`.
|
||||
- Fix resolution of libs in LDCache on ARM. This fixes CDI spec generation on ARM-based systems using NVML.
|
||||
- Run update-ldcache hook in isolated namespaces.
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
|
||||
- Bump CUDA base image version to 12.9.0
|
||||
|
||||
### Changes in libnvidia-container
|
||||
|
||||
- Add `--cuda-compat-mode` flag to the `nvidia-container-cli configure` command.
|
||||
|
||||
## v1.17.6
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
|
||||
- Allow container runtime executable path to be specified when configuring containerd.
|
||||
- Bump CUDA base image version to 12.8.1
|
||||
|
||||
### Changes in libnvidia-container
|
||||
|
||||
- Skip files when user has insufficient permissions. This prevents errors when discovering IPC sockets when the `nvidia-container-cli` is run as a non-root user.
|
||||
- Fix building with Go 1.24
|
||||
- Fix some typos in text.
|
||||
|
||||
## v1.17.5
|
||||
|
||||
- Allow the `enabled-cuda-compat` hook to be skipped when generating CDI specifications. This improves compatibility with older NVIDIA Container Toolkit installations. The hook is explicitly ignored for management CDI specifications.
|
||||
- Add IMEX binaries to CDI discovery. This includes the IMEX Daemon and IMEX Control binaries in containers.
|
||||
- Fix bug that may overwrite docker feature flags when configuring CDI from the `nvidia-ctk runtime configure` command.
|
||||
- Remove the unused `Set()` function from engine config API.
|
||||
- Add an `EnableCDI()` method to engine config API.
|
||||
- Add an `ignore-imex-channel-requests` feature flag. This ensures that the NVIDIA Container Runtime can be configured to ignore IMEX channel requests when these should be managed by another component.
|
||||
- Update the `update-ldcache` hook to run the host `ldconfig` from a MEMFD.
|
||||
- Add support for CUDA Forward Compatibility (removed by default in v1.17.4) using a dedicated `enable-cuda-compat` hook. This can be disabled using a `disable-cuda-compat-lib-hook` feature flag.
|
||||
- Disable nvsandboxutils in the `nvcdi` API. This prevents a segmentation violation with NVIDIA GPU Drivers from the 565 branch.
|
||||
- Fix a bug where `cdi` mode would not work with the `--gpus` flag even if the NVIDIA Container Runtime was used.
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
|
||||
- Enable CDI in container engine (Containerd, Cri-o, Docker) if CDI_ENABLED is set.
|
||||
- Bump CUDA base image version to 12.8.0
|
||||
|
||||
## v1.17.4
|
||||
- Disable mounting of compat libs from container by default
|
||||
- Add allow-cuda-compat-libs-from-container feature flag
|
||||
- Skip graphics modifier in CSV mode
|
||||
- Properly pass configSearchPaths to a Driver constructor
|
||||
- Add support for containerd version 3 config
|
||||
- Add string TOML source
|
||||
|
||||
### Changes in libnvidia-container
|
||||
- Add no-cntlibs CLI option to nvidia-container-cli
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
- Bump CUDA base image version to 12.6.3
|
||||
|
||||
## v1.17.3
|
||||
- Only allow host-relative LDConfig paths by default.
|
||||
### Changes in libnvidia-container
|
||||
- Create virtual copy of host ldconfig binary before calling fexecve()
|
||||
|
||||
## v1.17.2
|
||||
- Fixed a bug where legacy images would set imex channels as `all`.
|
||||
|
||||
## v1.17.1
|
||||
- Fixed a bug where specific symlinks existing in a container image could cause a container to fail to start.
|
||||
- Fixed a bug on Tegra-based systems where a container would fail to start.
|
||||
- Fixed a bug where the default container runtime config path was not properly set.
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
- Fallback to using a config file if the current runtime config can not be determined from the command line.
|
||||
|
||||
## v1.17.0
|
||||
- Promote v1.17.0-rc.2 to v1.17.0
|
||||
- Fix bug when using just-in-time CDI spec generation
|
||||
|
||||
@@ -34,7 +34,7 @@ environment variables.
|
||||
|
||||
## Testing packages locally
|
||||
|
||||
The [test/release](./test/release/) folder contains documentation on how the installation of local or staged packages can be tested.
|
||||
The [tests/release](./tests/release/) folder contains documentation on how the installation of local or staged packages can be tested.
|
||||
|
||||
|
||||
## Releasing
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
@@ -32,5 +33,6 @@ func New(logger logger.Interface) []*cli.Command {
|
||||
ldcache.NewCommand(logger),
|
||||
symlinks.NewCommand(logger),
|
||||
chmod.NewCommand(logger),
|
||||
cudacompat.NewCommand(logger),
|
||||
}
|
||||
}
|
||||
|
||||
76
cmd/nvidia-cdi-hook/cudacompat/container-root.go
Normal file
76
cmd/nvidia-cdi-hook/cudacompat/container-root.go
Normal file
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cudacompat
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/moby/sys/symlink"
|
||||
)
|
||||
|
||||
// A containerRoot represents the root filesystem of a container.
|
||||
type containerRoot string
|
||||
|
||||
// hasPath checks whether the specified path exists in the root.
|
||||
func (r containerRoot) hasPath(path string) bool {
|
||||
resolved, err := r.resolve(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// globFiles matches the specified pattern in the root.
|
||||
// The files that match must be regular files.
|
||||
func (r containerRoot) globFiles(pattern string) ([]string, error) {
|
||||
patternPath, err := r.resolve(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
matches, err := filepath.Glob(patternPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var files []string
|
||||
for _, match := range matches {
|
||||
info, err := os.Lstat(match)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Ignore symlinks.
|
||||
if info.Mode()&os.ModeSymlink != 0 {
|
||||
continue
|
||||
}
|
||||
// Ignore directories.
|
||||
if info.IsDir() {
|
||||
continue
|
||||
}
|
||||
files = append(files, match)
|
||||
}
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// resolve returns the absolute path including root path.
|
||||
// Symlinks are resolved, but are guaranteed to resolve in the root.
|
||||
func (r containerRoot) resolve(path string) (string, error) {
|
||||
absolute := filepath.Clean(filepath.Join(string(r), path))
|
||||
return symlink.FollowSymlinkInScope(absolute, string(r))
|
||||
}
|
||||
221
cmd/nvidia-cdi-hook/cudacompat/cudacompat.go
Normal file
221
cmd/nvidia-cdi-hook/cudacompat/cudacompat.go
Normal file
@@ -0,0 +1,221 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cudacompat
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
cudaCompatPath = "/usr/local/cuda/compat"
|
||||
// cudaCompatLdsoconfdFilenamePattern specifies the pattern for the filename
|
||||
// in ld.so.conf.d that includes a reference to the CUDA compat path.
|
||||
// The 00-compat prefix is chosen to ensure that these libraries have a
|
||||
// higher precedence than other libraries on the system.
|
||||
cudaCompatLdsoconfdFilenamePattern = "00-compat-*.conf"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type options struct {
|
||||
hostDriverVersion string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a cuda-compat command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the enable-cuda-compat command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := options{}
|
||||
|
||||
// Create the 'enable-cuda-compat' command
|
||||
c := cli.Command{
|
||||
Name: "enable-cuda-compat",
|
||||
Usage: "This hook ensures that the folder containing the CUDA compat libraries is added to the ldconfig search path if required.",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "host-driver-version",
|
||||
Usage: "Specify the host driver version. If the CUDA compat libraries detected in the container do not have a higher MAJOR version, the hook is a no-op.",
|
||||
Destination: &cfg.hostDriverVersion,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Hidden: true,
|
||||
Category: "testing-only",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(_ *cli.Context, cfg *options) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(_ *cli.Context, cfg *options) error {
|
||||
if cfg.hostDriverVersion == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %w", err)
|
||||
}
|
||||
|
||||
containerRootDir, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %w", err)
|
||||
}
|
||||
|
||||
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.hostDriverVersion)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get container forward compat directory: %w", err)
|
||||
}
|
||||
if containerForwardCompatDir == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return m.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir)
|
||||
}
|
||||
|
||||
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) {
|
||||
if hostDriverVersion == "" {
|
||||
m.logger.Debugf("Host driver version not specified")
|
||||
return "", nil
|
||||
}
|
||||
if !containerRoot.hasPath(cudaCompatPath) {
|
||||
m.logger.Debugf("No CUDA forward compatibility libraries directory in container")
|
||||
return "", nil
|
||||
}
|
||||
if !containerRoot.hasPath("/etc/ld.so.cache") {
|
||||
m.logger.Debugf("The container does not have an LDCache")
|
||||
return "", nil
|
||||
}
|
||||
|
||||
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*"))
|
||||
if err != nil {
|
||||
m.logger.Warningf("Failed to find CUDA compat library: %w", err)
|
||||
return "", nil
|
||||
}
|
||||
|
||||
if len(libs) == 0 {
|
||||
m.logger.Debugf("No CUDA forward compatibility libraries container")
|
||||
return "", nil
|
||||
}
|
||||
|
||||
if len(libs) != 1 {
|
||||
m.logger.Warningf("Unexpected number of CUDA compat libraries in container: %v", libs)
|
||||
return "", nil
|
||||
}
|
||||
|
||||
compatDriverVersion := strings.TrimPrefix(filepath.Base(libs[0]), "libcuda.so.")
|
||||
compatMajor, err := extractMajorVersion(compatDriverVersion)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to extract major version from %q: %v", compatDriverVersion, err)
|
||||
}
|
||||
|
||||
driverMajor, err := extractMajorVersion(hostDriverVersion)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to extract major version from %q: %v", hostDriverVersion, err)
|
||||
}
|
||||
|
||||
if driverMajor >= compatMajor {
|
||||
m.logger.Debugf("Compat major version is not greater than the host driver major version (%v >= %v)", hostDriverVersion, compatDriverVersion)
|
||||
return "", nil
|
||||
}
|
||||
|
||||
resolvedCompatDir := strings.TrimPrefix(filepath.Dir(libs[0]), string(containerRoot))
|
||||
return resolvedCompatDir, nil
|
||||
}
|
||||
|
||||
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root.
|
||||
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
|
||||
// contains the specified directories on each line.
|
||||
func (m command) createLdsoconfdFile(in containerRoot, pattern string, dirs ...string) error {
|
||||
if len(dirs) == 0 {
|
||||
m.logger.Debugf("No directories to add to /etc/ld.so.conf")
|
||||
return nil
|
||||
}
|
||||
|
||||
ldsoconfdDir, err := in.resolve("/etc/ld.so.conf.d")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
|
||||
}
|
||||
|
||||
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create config file: %w", err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
|
||||
m.logger.Debugf("Adding directories %v to %v", dirs, configFile.Name())
|
||||
|
||||
added := make(map[string]bool)
|
||||
for _, dir := range dirs {
|
||||
if added[dir] {
|
||||
continue
|
||||
}
|
||||
_, err = configFile.WriteString(fmt.Sprintf("%s\n", dir))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update config file: %w", err)
|
||||
}
|
||||
added[dir] = true
|
||||
}
|
||||
|
||||
// The created file needs to be world readable for the cases where the container is run as a non-root user.
|
||||
if err := configFile.Chmod(0644); err != nil {
|
||||
return fmt.Errorf("failed to chmod config file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractMajorVersion parses a version string and returns the major version as an int.
|
||||
func extractMajorVersion(version string) (int, error) {
|
||||
majorString := strings.SplitN(version, ".", 2)[0]
|
||||
return strconv.Atoi(majorString)
|
||||
}
|
||||
182
cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go
Normal file
182
cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go
Normal file
@@ -0,0 +1,182 @@
|
||||
/*
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package cudacompat
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCompatLibs(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
contents map[string]string
|
||||
hostDriverVersion string
|
||||
expectedContainerForwardCompatDir string
|
||||
}{
|
||||
{
|
||||
description: "empty root",
|
||||
hostDriverVersion: "222.55.66",
|
||||
},
|
||||
{
|
||||
description: "compat lib is newer; no ldcache",
|
||||
contents: map[string]string{
|
||||
"/usr/local/cuda/compat/libcuda.so.333.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
},
|
||||
{
|
||||
description: "compat lib is newer; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.333.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "/usr/local/cuda/compat",
|
||||
},
|
||||
{
|
||||
description: "compat lib is older; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.111.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "",
|
||||
},
|
||||
{
|
||||
description: "compat lib has same major version; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "",
|
||||
},
|
||||
{
|
||||
description: "numeric comparison is used; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "99.55.66",
|
||||
expectedContainerForwardCompatDir: "/usr/local/cuda/compat",
|
||||
},
|
||||
{
|
||||
description: "driver version empty; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "",
|
||||
},
|
||||
{
|
||||
description: "symlinks are followed",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/etc/alternatives/cuda/compat/libcuda.so.333.88.99": "",
|
||||
"/usr/local/cuda": "symlink=/etc/alternatives/cuda",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "/etc/alternatives/cuda/compat",
|
||||
},
|
||||
{
|
||||
description: "symlinks stay in container",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/compat/libcuda.so.333.88.99": "",
|
||||
"/usr/local/cuda": "symlink=../../../../../../",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "/compat",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
containerRootDir := t.TempDir()
|
||||
for name, contents := range tc.contents {
|
||||
target := filepath.Join(containerRootDir, name)
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(target), 0755))
|
||||
|
||||
if strings.HasPrefix(contents, "symlink=") {
|
||||
require.NoError(t, os.Symlink(strings.TrimPrefix(contents, "symlink="), target))
|
||||
continue
|
||||
}
|
||||
|
||||
require.NoError(t, os.WriteFile(target, []byte(contents), 0600))
|
||||
}
|
||||
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), tc.hostDriverVersion)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expectedContainerForwardCompatDir, containerForwardCompatDir)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateLdconfig(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
testCases := []struct {
|
||||
description string
|
||||
folders []string
|
||||
expectedContents string
|
||||
}{
|
||||
{
|
||||
description: "no folders; have no contents",
|
||||
},
|
||||
{
|
||||
description: "single folder is added",
|
||||
folders: []string{"/usr/local/cuda/compat"},
|
||||
expectedContents: "/usr/local/cuda/compat\n",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
containerRootDir := t.TempDir()
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
err := c.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, tc.folders...)
|
||||
require.NoError(t, err)
|
||||
|
||||
matches, err := filepath.Glob(filepath.Join(containerRootDir, "/etc/ld.so.conf.d/00-compat-*.conf"))
|
||||
require.NoError(t, err)
|
||||
|
||||
if tc.expectedContents == "" {
|
||||
require.Empty(t, matches)
|
||||
return
|
||||
}
|
||||
|
||||
require.Len(t, matches, 1)
|
||||
contents, err := os.ReadFile(matches[0])
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, tc.expectedContents, string(contents))
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
46
cmd/nvidia-cdi-hook/update-ldcache/container-root.go
Normal file
46
cmd/nvidia-cdi-hook/update-ldcache/container-root.go
Normal file
@@ -0,0 +1,46 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/moby/sys/symlink"
|
||||
)
|
||||
|
||||
// A containerRoot represents the root filesystem of a container.
|
||||
type containerRoot string
|
||||
|
||||
// hasPath checks whether the specified path exists in the root.
|
||||
func (r containerRoot) hasPath(path string) bool {
|
||||
resolved, err := r.resolve(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// resolve returns the absolute path including root path.
|
||||
// Symlinks are resolved, but are guaranteed to resolve in the root.
|
||||
func (r containerRoot) resolve(path string) (string, error) {
|
||||
absolute := filepath.Clean(filepath.Join(string(r), path))
|
||||
return symlink.FollowSymlinkInScope(absolute, string(r))
|
||||
}
|
||||
200
cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go
Normal file
200
cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go
Normal file
@@ -0,0 +1,200 @@
|
||||
//go:build linux
|
||||
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
|
||||
"github.com/moby/sys/reexec"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// pivotRoot will call pivot_root such that rootfs becomes the new root
|
||||
// filesystem, and everything else is cleaned up.
|
||||
// This is adapted from the implementation here:
|
||||
//
|
||||
// https://github.com/opencontainers/runc/blob/e89a29929c775025419ab0d218a43588b4c12b9a/libcontainer/rootfs_linux.go#L1056-L1113
|
||||
//
|
||||
// With the `mount` and `unmount` calls changed to direct unix.Mount and unix.Unmount calls.
|
||||
func pivotRoot(rootfs string) error {
|
||||
// While the documentation may claim otherwise, pivot_root(".", ".") is
|
||||
// actually valid. What this results in is / being the new root but
|
||||
// /proc/self/cwd being the old root. Since we can play around with the cwd
|
||||
// with pivot_root this allows us to pivot without creating directories in
|
||||
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
|
||||
|
||||
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return &os.PathError{Op: "open", Path: "/", Err: err}
|
||||
}
|
||||
defer unix.Close(oldroot) //nolint: errcheck
|
||||
|
||||
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return &os.PathError{Op: "open", Path: rootfs, Err: err}
|
||||
}
|
||||
defer unix.Close(newroot) //nolint: errcheck
|
||||
|
||||
// Change to the new root so that the pivot_root actually acts on it.
|
||||
if err := unix.Fchdir(newroot); err != nil {
|
||||
return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err}
|
||||
}
|
||||
|
||||
if err := unix.PivotRoot(".", "."); err != nil {
|
||||
return &os.PathError{Op: "pivot_root", Path: ".", Err: err}
|
||||
}
|
||||
|
||||
// Currently our "." is oldroot (according to the current kernel code).
|
||||
// However, purely for safety, we will fchdir(oldroot) since there isn't
|
||||
// really any guarantee from the kernel what /proc/self/cwd will be after a
|
||||
// pivot_root(2).
|
||||
|
||||
if err := unix.Fchdir(oldroot); err != nil {
|
||||
return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err}
|
||||
}
|
||||
|
||||
// Make oldroot rslave to make sure our unmounts don't propagate to the
|
||||
// host (and thus bork the machine). We don't use rprivate because this is
|
||||
// known to cause issues due to races where we still have a reference to a
|
||||
// mount while a process in the host namespace are trying to operate on
|
||||
// something they think has no mounts (devicemapper in particular).
|
||||
if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
// Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
|
||||
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Switch back to our shiny new root.
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return &os.PathError{Op: "chdir", Path: "/", Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mountLdConfig mounts the host ldconfig to the mount namespace of the hook.
|
||||
// We use WithProcfd to perform the mount operations to ensure that the changes
|
||||
// are persisted across the pivot root.
|
||||
func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) {
|
||||
hostLdconfigInfo, err := os.Stat(hostLdconfigPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error reading host ldconfig: %w", err)
|
||||
}
|
||||
|
||||
hookScratchDirPath := "/var/run/nvidia-ctk-hook"
|
||||
ldconfigPath := filepath.Join(hookScratchDirPath, "ldconfig")
|
||||
if err := utils.MkdirAllInRoot(containerRootDirPath, hookScratchDirPath, 0755); err != nil {
|
||||
return "", fmt.Errorf("error creating hook scratch folder: %w", err)
|
||||
}
|
||||
|
||||
err = utils.WithProcfd(containerRootDirPath, hookScratchDirPath, func(hookScratchDirFdPath string) error {
|
||||
return createTmpFs(hookScratchDirFdPath, int(hostLdconfigInfo.Size()))
|
||||
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error creating tmpfs: %w", err)
|
||||
}
|
||||
|
||||
if _, err := createFileInRoot(containerRootDirPath, ldconfigPath, hostLdconfigInfo.Mode()); err != nil {
|
||||
return "", fmt.Errorf("error creating ldconfig: %w", err)
|
||||
}
|
||||
|
||||
err = utils.WithProcfd(containerRootDirPath, ldconfigPath, func(ldconfigFdPath string) error {
|
||||
return unix.Mount(hostLdconfigPath, ldconfigFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error bind mounting host ldconfig: %w", err)
|
||||
}
|
||||
|
||||
return ldconfigPath, nil
|
||||
}
|
||||
|
||||
func createFileInRoot(containerRootDirPath string, destinationPath string, mode os.FileMode) (string, error) {
|
||||
dest, err := securejoin.SecureJoin(containerRootDirPath, destinationPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// Make the parent directory.
|
||||
destDir, destBase := filepath.Split(dest)
|
||||
destDirFd, err := utils.MkdirAllInRootOpen(containerRootDirPath, destDir, 0755)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error creating parent dir: %w", err)
|
||||
}
|
||||
defer destDirFd.Close()
|
||||
// Make the target file. We want to avoid opening any file that is
|
||||
// already there because it could be a "bad" file like an invalid
|
||||
// device or hung tty that might cause a DoS, so we use mknodat.
|
||||
// destBase does not contain any "/" components, and mknodat does
|
||||
// not follow trailing symlinks, so we can safely just call mknodat
|
||||
// here.
|
||||
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|uint32(mode), 0); err != nil {
|
||||
// If we get EEXIST, there was already an inode there and
|
||||
// we can consider that a success.
|
||||
if !errors.Is(err, unix.EEXIST) {
|
||||
return "", fmt.Errorf("error creating empty file: %w", err)
|
||||
}
|
||||
}
|
||||
return dest, nil
|
||||
}
|
||||
|
||||
// mountProc mounts a clean proc filesystem in the new root.
|
||||
func mountProc(newroot string) error {
|
||||
target := filepath.Join(newroot, "/proc")
|
||||
|
||||
if err := os.MkdirAll(target, 0755); err != nil {
|
||||
return fmt.Errorf("error creating directory: %w", err)
|
||||
}
|
||||
return unix.Mount("proc", target, "proc", 0, "")
|
||||
}
|
||||
|
||||
// createTmpFs creates a tmpfs at the specified location with the specified size.
|
||||
func createTmpFs(target string, size int) error {
|
||||
return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size))
|
||||
}
|
||||
|
||||
// createReexecCommand creates a command that can be used to trigger the reexec
|
||||
// initializer.
|
||||
// On linux this command runs in new namespaces.
|
||||
func createReexecCommand(args []string) *exec.Cmd {
|
||||
cmd := reexec.Command(args...)
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Cloneflags: syscall.CLONE_NEWNS |
|
||||
syscall.CLONE_NEWUTS |
|
||||
syscall.CLONE_NEWIPC |
|
||||
syscall.CLONE_NEWPID |
|
||||
syscall.CLONE_NEWNET,
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
51
cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go
Normal file
51
cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go
Normal file
@@ -0,0 +1,51 @@
|
||||
//go:build !linux
|
||||
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
"github.com/moby/sys/reexec"
|
||||
)
|
||||
|
||||
func pivotRoot(newroot string) error {
|
||||
return fmt.Errorf("not supported")
|
||||
}
|
||||
|
||||
func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) {
|
||||
return "", fmt.Errorf("not supported")
|
||||
}
|
||||
|
||||
func mountProc(newroot string) error {
|
||||
return fmt.Errorf("not supported")
|
||||
}
|
||||
|
||||
// createReexecCommand creates a command that can be used ot trigger the reexec
|
||||
// initializer.
|
||||
func createReexecCommand(args []string) *exec.Cmd {
|
||||
cmd := reexec.Command(args...)
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
return cmd
|
||||
}
|
||||
58
cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go
Normal file
58
cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go
Normal file
@@ -0,0 +1,58 @@
|
||||
//go:build linux
|
||||
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/dmz"
|
||||
)
|
||||
|
||||
// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it.
|
||||
func SafeExec(path string, args []string, envv []string) error {
|
||||
safeExe, err := cloneBinary(path)
|
||||
if err != nil {
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(path, args, envv)
|
||||
}
|
||||
defer safeExe.Close()
|
||||
|
||||
exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(exePath, args, envv)
|
||||
}
|
||||
|
||||
func cloneBinary(path string) (*os.File, error) {
|
||||
exe, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening current binary: %w", err)
|
||||
}
|
||||
defer exe.Close()
|
||||
|
||||
stat, err := exe.Stat()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("checking %v size: %w", path, err)
|
||||
}
|
||||
size := stat.Size()
|
||||
|
||||
return dmz.CloneBinary(exe, size, path, os.TempDir())
|
||||
}
|
||||
28
cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go
Normal file
28
cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go
Normal file
@@ -0,0 +1,28 @@
|
||||
//go:build !linux
|
||||
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import "syscall"
|
||||
|
||||
// SafeExec is not implemented on non-linux systems and forwards directly to the
|
||||
// Exec syscall.
|
||||
func SafeExec(path string, args []string, envv []string) error {
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(path, args, envv)
|
||||
}
|
||||
@@ -19,11 +19,11 @@ package ldcache
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/moby/sys/reexec"
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
@@ -31,6 +31,17 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
// ldsoconfdFilenamePattern specifies the pattern for the filename
|
||||
// in ld.so.conf.d that includes references to the specified directories.
|
||||
// The 00-nvcr prefix is chosen to ensure that these libraries have a
|
||||
// higher precedence than other libraries on the system, but lower than
|
||||
// the 00-cuda-compat that is included in some containers.
|
||||
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
|
||||
|
||||
reexecUpdateLdCacheCommandName = "reexec-update-ldcache"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
@@ -41,6 +52,13 @@ type options struct {
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
func init() {
|
||||
reexec.Register(reexecUpdateLdCacheCommandName, updateLdCacheHandler)
|
||||
if reexec.Init() {
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
// NewCommand constructs an update-ldcache command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
@@ -100,97 +118,137 @@ func (m command) run(c *cli.Context, cfg *options) error {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
containerRootDir, err := s.GetContainerRoot()
|
||||
if err != nil || containerRootDir == "" || containerRootDir == "/" {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
ldconfigPath := m.resolveLDConfigPath(cfg.ldconfigPath)
|
||||
args := []string{filepath.Base(ldconfigPath)}
|
||||
if containerRoot != "" {
|
||||
args = append(args, "-r", containerRoot)
|
||||
args := []string{
|
||||
reexecUpdateLdCacheCommandName,
|
||||
strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"),
|
||||
containerRootDir,
|
||||
}
|
||||
args = append(args, cfg.folders.Value()...)
|
||||
|
||||
cmd := createReexecCommand(args)
|
||||
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// updateLdCacheHandler wraps updateLdCache with error handling.
|
||||
func updateLdCacheHandler() {
|
||||
if err := updateLdCache(os.Args); err != nil {
|
||||
log.Printf("Error updating ldcache: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// updateLdCache is invoked from a reexec'd handler and provides namespace
|
||||
// isolation for the operations performed by this hook.
|
||||
// At the point where this is invoked, we are in a new mount namespace that is
|
||||
// cloned from the parent.
|
||||
//
|
||||
// args[0] is the reexec initializer function name
|
||||
// args[1] is the path of the ldconfig binary on the host
|
||||
// args[2] is the container root directory
|
||||
// The remaining args are folders that need to be added to the ldcache.
|
||||
func updateLdCache(args []string) error {
|
||||
if len(args) < 3 {
|
||||
return fmt.Errorf("incorrect arguments: %v", args)
|
||||
}
|
||||
hostLdconfigPath := args[1]
|
||||
containerRootDirPath := args[2]
|
||||
|
||||
// To prevent leaking the parent proc filesystem, we create a new proc mount
|
||||
// in the container root.
|
||||
if err := mountProc(containerRootDirPath); err != nil {
|
||||
return fmt.Errorf("error mounting /proc: %w", err)
|
||||
}
|
||||
|
||||
if root(containerRoot).hasPath("/etc/ld.so.cache") {
|
||||
// We mount the host ldconfig before we pivot root since host paths are not
|
||||
// visible after the pivot root operation.
|
||||
ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error mounting host ldconfig: %w", err)
|
||||
}
|
||||
|
||||
// We pivot to the container root for the new process, this further limits
|
||||
// access to the host.
|
||||
if err := pivotRoot(containerRootDirPath); err != nil {
|
||||
return fmt.Errorf("error running pivot_root: %w", err)
|
||||
}
|
||||
|
||||
return runLdconfig(ldconfigPath, args[3:]...)
|
||||
}
|
||||
|
||||
// runLdconfig runs the ldconfig binary and ensures that the specified directories
|
||||
// are processed for the ldcache.
|
||||
func runLdconfig(ldconfigPath string, directories ...string) error {
|
||||
args := []string{
|
||||
"ldconfig",
|
||||
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
|
||||
// be configured to use a different config file by default.
|
||||
// Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is
|
||||
// in the container.
|
||||
"-f", "/etc/ld.so.conf",
|
||||
}
|
||||
|
||||
containerRoot := containerRoot("/")
|
||||
|
||||
if containerRoot.hasPath("/etc/ld.so.cache") {
|
||||
args = append(args, "-C", "/etc/ld.so.cache")
|
||||
} else {
|
||||
m.logger.Debugf("No ld.so.cache found, skipping update")
|
||||
args = append(args, "-N")
|
||||
}
|
||||
|
||||
folders := cfg.folders.Value()
|
||||
if root(containerRoot).hasPath("/etc/ld.so.conf.d") {
|
||||
err := m.createConfig(containerRoot, folders)
|
||||
if containerRoot.hasPath("/etc/ld.so.conf.d") {
|
||||
err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %w", err)
|
||||
}
|
||||
} else {
|
||||
args = append(args, folders...)
|
||||
args = append(args, directories...)
|
||||
}
|
||||
|
||||
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
|
||||
// be configured to use a different config file by default.
|
||||
args = append(args, "-f", "/etc/ld.so.conf")
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(ldconfigPath, args, nil)
|
||||
return SafeExec(ldconfigPath, args, nil)
|
||||
}
|
||||
|
||||
type root string
|
||||
|
||||
func (r root) hasPath(path string) bool {
|
||||
_, err := os.Stat(filepath.Join(string(r), path))
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// resolveLDConfigPath determines the LDConfig path to use for the system.
|
||||
// On systems such as Ubuntu where `/sbin/ldconfig` is a wrapper around
|
||||
// /sbin/ldconfig.real, the latter is returned.
|
||||
func (m command) resolveLDConfigPath(path string) string {
|
||||
return strings.TrimPrefix(config.NormalizeLDConfigPath("@"+path), "@")
|
||||
}
|
||||
|
||||
// createConfig creates (or updates) /etc/ld.so.conf.d/00-nvcr-<RANDOM_STRING>.conf in the container
|
||||
// to include the required paths.
|
||||
// Note that the 00-nvcr prefix is chosen to ensure that these libraries have
|
||||
// a higher precedence than other libraries on the system but are applied AFTER
|
||||
// 00-cuda-compat.conf.
|
||||
func (m command) createConfig(root string, folders []string) error {
|
||||
if len(folders) == 0 {
|
||||
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
|
||||
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/.
|
||||
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
|
||||
// contains the specified directories on each line.
|
||||
func createLdsoconfdFile(pattern string, dirs ...string) error {
|
||||
if len(dirs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Join(root, "/etc/ld.so.conf.d"), 0755); err != nil {
|
||||
return fmt.Errorf("failed to create ld.so.conf.d: %v", err)
|
||||
ldsoconfdDir := "/etc/ld.so.conf.d"
|
||||
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
|
||||
}
|
||||
|
||||
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "00-nvcr-*.conf")
|
||||
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create config file: %v", err)
|
||||
return fmt.Errorf("failed to create config file: %w", err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
defer func() {
|
||||
_ = configFile.Close()
|
||||
}()
|
||||
|
||||
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
|
||||
|
||||
configured := make(map[string]bool)
|
||||
for _, folder := range folders {
|
||||
if configured[folder] {
|
||||
added := make(map[string]bool)
|
||||
for _, dir := range dirs {
|
||||
if added[dir] {
|
||||
continue
|
||||
}
|
||||
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
|
||||
_, err = fmt.Fprintf(configFile, "%s\n", dir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
|
||||
return fmt.Errorf("failed to update config file: %w", err)
|
||||
}
|
||||
configured[folder] = true
|
||||
added[dir] = true
|
||||
}
|
||||
|
||||
// The created file needs to be world readable for the cases where the container is run as a non-root user.
|
||||
if err := os.Chmod(configFile.Name(), 0644); err != nil {
|
||||
return fmt.Errorf("failed to chmod config file: %v", err)
|
||||
if err := configFile.Chmod(0644); err != nil {
|
||||
return fmt.Errorf("failed to chmod config file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -157,7 +157,7 @@ func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []stri
|
||||
return containerImage.VisibleDevicesFromEnvVar()
|
||||
}
|
||||
|
||||
func getDevices(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
|
||||
func (hookConfig *hookConfig) getDevices(image image.CUDA, privileged bool) []string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := image.VisibleDevicesFromMounts()
|
||||
@@ -197,7 +197,11 @@ func getMigDevices(image image.CUDA, envvar string) *string {
|
||||
return &devices
|
||||
}
|
||||
|
||||
func getImexChannels(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
|
||||
func (hookConfig *hookConfig) getImexChannels(image image.CUDA, privileged bool) []string {
|
||||
if hookConfig.Features.IgnoreImexChannelRequests.IsEnabled() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := image.ImexChannelsFromMounts()
|
||||
@@ -217,10 +221,10 @@ func getImexChannels(hookConfig *HookConfig, image image.CUDA, privileged bool)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
|
||||
func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
|
||||
// We use the default driver capabilities by default. This is filtered to only include the
|
||||
// supported capabilities
|
||||
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
|
||||
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
|
||||
|
||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||
|
||||
@@ -244,10 +248,10 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
|
||||
return capabilities
|
||||
}
|
||||
|
||||
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool) *nvidiaConfig {
|
||||
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
|
||||
legacyImage := image.IsLegacy()
|
||||
|
||||
devices := getDevices(hookConfig, image, privileged)
|
||||
devices := hookConfig.getDevices(image, privileged)
|
||||
if len(devices) == 0 {
|
||||
// empty devices means this is not a GPU container.
|
||||
return nil
|
||||
@@ -269,7 +273,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool)
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
imexChannels := getImexChannels(hookConfig, image, privileged)
|
||||
imexChannels := hookConfig.getImexChannels(image, privileged)
|
||||
|
||||
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
|
||||
|
||||
@@ -288,7 +292,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool)
|
||||
}
|
||||
}
|
||||
|
||||
func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
|
||||
var h HookState
|
||||
d := json.NewDecoder(os.Stdin)
|
||||
if err := d.Decode(&h); err != nil {
|
||||
@@ -305,7 +309,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
image, err := image.New(
|
||||
image.WithEnv(s.Process.Env),
|
||||
image.WithMounts(s.Mounts),
|
||||
image.WithDisableRequire(hook.DisableRequire),
|
||||
image.WithDisableRequire(hookConfig.DisableRequire),
|
||||
)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
@@ -316,6 +320,6 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Image: image,
|
||||
Nvidia: getNvidiaConfig(&hook, image, privileged),
|
||||
Nvidia: hookConfig.getNvidiaConfig(image, privileged),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
@@ -15,7 +16,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description string
|
||||
env map[string]string
|
||||
privileged bool
|
||||
hookConfig *HookConfig
|
||||
hookConfig *hookConfig
|
||||
expectedConfig *nvidiaConfig
|
||||
expectedPanic bool
|
||||
}{
|
||||
@@ -394,8 +395,10 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
image.EnvVarNvidiaDriverCapabilities: "all",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SupportedDriverCapabilities: "video,display",
|
||||
hookConfig: &hookConfig{
|
||||
Config: &config.Config{
|
||||
SupportedDriverCapabilities: "video,display",
|
||||
},
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: []string{"all"},
|
||||
@@ -409,8 +412,10 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
image.EnvVarNvidiaDriverCapabilities: "video,display",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SupportedDriverCapabilities: "video,display,compute,utility",
|
||||
hookConfig: &hookConfig{
|
||||
Config: &config.Config{
|
||||
SupportedDriverCapabilities: "video,display,compute,utility",
|
||||
},
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: []string{"all"},
|
||||
@@ -423,8 +428,10 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
image.EnvVarNvidiaVisibleDevices: "all",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
hookConfig: &hookConfig{
|
||||
Config: &config.Config{
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: []string{"all"},
|
||||
@@ -438,9 +445,11 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: "DOCKER_SWARM_RESOURCE",
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
hookConfig: &hookConfig{
|
||||
Config: &config.Config{
|
||||
SwarmResource: "DOCKER_SWARM_RESOURCE",
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: []string{"GPU1", "GPU2"},
|
||||
@@ -454,9 +463,11 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE",
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
hookConfig: &hookConfig{
|
||||
Config: &config.Config{
|
||||
SwarmResource: "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE",
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: []string{"GPU1", "GPU2"},
|
||||
@@ -470,14 +481,14 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
// Wrap the call to getNvidiaConfig() in a closure.
|
||||
var config *nvidiaConfig
|
||||
var cfg *nvidiaConfig
|
||||
getConfig := func() {
|
||||
hookConfig := tc.hookConfig
|
||||
if hookConfig == nil {
|
||||
defaultConfig, _ := getDefaultHookConfig()
|
||||
hookConfig = &defaultConfig
|
||||
hookCfg := tc.hookConfig
|
||||
if hookCfg == nil {
|
||||
defaultConfig, _ := config.GetDefault()
|
||||
hookCfg = &hookConfig{defaultConfig}
|
||||
}
|
||||
config = getNvidiaConfig(hookConfig, image, tc.privileged)
|
||||
cfg = hookCfg.getNvidiaConfig(image, tc.privileged)
|
||||
}
|
||||
|
||||
// For any tests that are expected to panic, make sure they do.
|
||||
@@ -491,18 +502,18 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
|
||||
// And start comparing the test results to the expected results.
|
||||
if tc.expectedConfig == nil {
|
||||
require.Nil(t, config, tc.description)
|
||||
require.Nil(t, cfg, tc.description)
|
||||
return
|
||||
}
|
||||
|
||||
require.NotNil(t, config, tc.description)
|
||||
require.NotNil(t, cfg, tc.description)
|
||||
|
||||
require.Equal(t, tc.expectedConfig.Devices, config.Devices)
|
||||
require.Equal(t, tc.expectedConfig.MigConfigDevices, config.MigConfigDevices)
|
||||
require.Equal(t, tc.expectedConfig.MigMonitorDevices, config.MigMonitorDevices)
|
||||
require.Equal(t, tc.expectedConfig.DriverCapabilities, config.DriverCapabilities)
|
||||
require.Equal(t, tc.expectedConfig.Devices, cfg.Devices)
|
||||
require.Equal(t, tc.expectedConfig.MigConfigDevices, cfg.MigConfigDevices)
|
||||
require.Equal(t, tc.expectedConfig.MigMonitorDevices, cfg.MigMonitorDevices)
|
||||
require.Equal(t, tc.expectedConfig.DriverCapabilities, cfg.DriverCapabilities)
|
||||
|
||||
require.ElementsMatch(t, tc.expectedConfig.Requirements, config.Requirements)
|
||||
require.ElementsMatch(t, tc.expectedConfig.Requirements, cfg.Requirements)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -612,10 +623,11 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
||||
),
|
||||
image.WithMounts(tc.mountDevices),
|
||||
)
|
||||
hookConfig, _ := getDefaultHookConfig()
|
||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = getDevices(&hookConfig, image, tc.privileged)
|
||||
defaultConfig, _ := config.GetDefault()
|
||||
cfg := &hookConfig{defaultConfig}
|
||||
cfg.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
cfg.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = cfg.getDevices(image, tc.privileged)
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
@@ -940,8 +952,10 @@ func TestGetDriverCapabilities(t *testing.T) {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
var capabilities string
|
||||
|
||||
c := HookConfig{
|
||||
SupportedDriverCapabilities: tc.supportedCapabilities,
|
||||
c := hookConfig{
|
||||
Config: &config.Config{
|
||||
SupportedDriverCapabilities: tc.supportedCapabilities,
|
||||
},
|
||||
}
|
||||
|
||||
image, _ := image.New(
|
||||
|
||||
@@ -17,16 +17,10 @@ const (
|
||||
driverPath = "/run/nvidia/driver"
|
||||
)
|
||||
|
||||
// HookConfig : options for the nvidia-container-runtime-hook.
|
||||
type HookConfig config.Config
|
||||
|
||||
func getDefaultHookConfig() (HookConfig, error) {
|
||||
defaultCfg, err := config.GetDefault()
|
||||
if err != nil {
|
||||
return HookConfig{}, err
|
||||
}
|
||||
|
||||
return *(*HookConfig)(defaultCfg), nil
|
||||
// hookConfig wraps the toolkit config.
|
||||
// This allows for functions to be defined on the local type.
|
||||
type hookConfig struct {
|
||||
*config.Config
|
||||
}
|
||||
|
||||
// loadConfig loads the required paths for the hook config.
|
||||
@@ -56,12 +50,12 @@ func loadConfig() (*config.Config, error) {
|
||||
return config.GetDefault()
|
||||
}
|
||||
|
||||
func getHookConfig() (*HookConfig, error) {
|
||||
func getHookConfig() (*hookConfig, error) {
|
||||
cfg, err := loadConfig()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load config: %v", err)
|
||||
}
|
||||
config := (*HookConfig)(cfg)
|
||||
config := &hookConfig{cfg}
|
||||
|
||||
allSupportedDriverCapabilities := image.SupportedDriverCapabilities
|
||||
if config.SupportedDriverCapabilities == "all" {
|
||||
@@ -79,7 +73,7 @@ func getHookConfig() (*HookConfig, error) {
|
||||
|
||||
// getConfigOption returns the toml config option associated with the
|
||||
// specified struct field.
|
||||
func (c HookConfig) getConfigOption(fieldName string) string {
|
||||
func (c hookConfig) getConfigOption(fieldName string) string {
|
||||
t := reflect.TypeOf(c)
|
||||
f, ok := t.FieldByName(fieldName)
|
||||
if !ok {
|
||||
@@ -93,7 +87,7 @@ func (c HookConfig) getConfigOption(fieldName string) string {
|
||||
}
|
||||
|
||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||
func (c *HookConfig) getSwarmResourceEnvvars() []string {
|
||||
func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c.SwarmResource == "" {
|
||||
return nil
|
||||
}
|
||||
@@ -110,3 +104,26 @@ func (c *HookConfig) getSwarmResourceEnvvars() []string {
|
||||
|
||||
return envvars
|
||||
}
|
||||
|
||||
// nvidiaContainerCliCUDACompatModeFlags returns required --cuda-compat-mode
|
||||
// flag(s) depending on the hook and runtime configurations.
|
||||
func (c *hookConfig) nvidiaContainerCliCUDACompatModeFlags() []string {
|
||||
var flag string
|
||||
switch c.NVIDIAContainerRuntimeConfig.Modes.Legacy.CUDACompatMode {
|
||||
case config.CUDACompatModeLdconfig:
|
||||
flag = "--cuda-compat-mode=ldconfig"
|
||||
case config.CUDACompatModeMount:
|
||||
flag = "--cuda-compat-mode=mount"
|
||||
case config.CUDACompatModeDisabled, config.CUDACompatModeHook:
|
||||
flag = "--cuda-compat-mode=disabled"
|
||||
default:
|
||||
if !c.Features.AllowCUDACompatLibsFromContainer.IsEnabled() {
|
||||
flag = "--cuda-compat-mode=disabled"
|
||||
}
|
||||
}
|
||||
|
||||
if flag == "" {
|
||||
return nil
|
||||
}
|
||||
return []string{flag}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
@@ -89,10 +90,10 @@ func TestGetHookConfig(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
var config HookConfig
|
||||
var cfg hookConfig
|
||||
getHookConfig := func() {
|
||||
c, _ := getHookConfig()
|
||||
config = *c
|
||||
cfg = *c
|
||||
}
|
||||
|
||||
if tc.expectedPanic {
|
||||
@@ -102,7 +103,7 @@ func TestGetHookConfig(t *testing.T) {
|
||||
|
||||
getHookConfig()
|
||||
|
||||
require.EqualValues(t, tc.expectedDriverCapabilities, config.SupportedDriverCapabilities)
|
||||
require.EqualValues(t, tc.expectedDriverCapabilities, cfg.SupportedDriverCapabilities)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -144,8 +145,10 @@ func TestGetSwarmResourceEnvvars(t *testing.T) {
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
c := &HookConfig{
|
||||
SwarmResource: tc.value,
|
||||
c := &hookConfig{
|
||||
Config: &config.Config{
|
||||
SwarmResource: tc.value,
|
||||
},
|
||||
}
|
||||
|
||||
envvars := c.getSwarmResourceEnvvars()
|
||||
|
||||
@@ -75,7 +75,7 @@ func doPrestart() {
|
||||
}
|
||||
cli := hook.NVIDIAContainerCLIConfig
|
||||
|
||||
container := getContainerConfig(*hook)
|
||||
container := hook.getContainerConfig()
|
||||
nvidia := container.Nvidia
|
||||
if nvidia == nil {
|
||||
// Not a GPU container, nothing to do.
|
||||
@@ -114,6 +114,8 @@ func doPrestart() {
|
||||
}
|
||||
args = append(args, "configure")
|
||||
|
||||
args = append(args, hook.nvidiaContainerCliCUDACompatModeFlags()...)
|
||||
|
||||
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
|
||||
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
|
||||
}
|
||||
|
||||
@@ -22,9 +22,9 @@ import (
|
||||
const (
|
||||
nvidiaRuntime = "nvidia-container-runtime"
|
||||
nvidiaHook = "nvidia-container-runtime-hook"
|
||||
bundlePathSuffix = "test/output/bundle/"
|
||||
bundlePathSuffix = "tests/output/bundle/"
|
||||
specFile = "config.json"
|
||||
unmodifiedSpecFileSuffix = "test/input/test_spec.json"
|
||||
unmodifiedSpecFileSuffix = "tests/input/test_spec.json"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -46,8 +46,8 @@ func TestMain(m *testing.M) {
|
||||
if err != nil {
|
||||
log.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
}
|
||||
testBinPath := filepath.Join(moduleRoot, "test", "bin")
|
||||
testInputPath := filepath.Join(moduleRoot, "test", "input")
|
||||
testBinPath := filepath.Join(moduleRoot, "tests", "bin")
|
||||
testInputPath := filepath.Join(moduleRoot, "tests", "input")
|
||||
|
||||
// Set the environment variables for the test
|
||||
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
|
||||
|
||||
@@ -68,12 +68,11 @@ type config struct {
|
||||
dryRun bool
|
||||
runtime string
|
||||
configFilePath string
|
||||
executablePath string
|
||||
configSource string
|
||||
mode string
|
||||
hookFilePath string
|
||||
|
||||
runtimeConfigOverrideJSON string
|
||||
|
||||
nvidiaRuntime struct {
|
||||
name string
|
||||
path string
|
||||
@@ -120,6 +119,11 @@ func (m command) build() *cli.Command {
|
||||
Usage: "path to the config file for the target runtime",
|
||||
Destination: &config.configFilePath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "executable-path",
|
||||
Usage: "The path to the runtime executable. This is used to extract the current config",
|
||||
Destination: &config.executablePath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config-mode",
|
||||
Usage: "the config mode for runtimes that support multiple configuration mechanisms",
|
||||
@@ -163,7 +167,7 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "cdi.enabled",
|
||||
Aliases: []string{"cdi.enable"},
|
||||
Aliases: []string{"cdi.enable", "enable-cdi"},
|
||||
Usage: "Enable CDI in the configured runtime",
|
||||
Destination: &config.cdi.enabled,
|
||||
},
|
||||
@@ -208,9 +212,9 @@ func (m command) validateFlags(c *cli.Context, config *config) error {
|
||||
config.cdi.enabled = false
|
||||
}
|
||||
|
||||
if config.runtimeConfigOverrideJSON != "" && config.runtime != "containerd" {
|
||||
m.logger.Warningf("Ignoring runtime-config-override flag for %v", config.runtime)
|
||||
config.runtimeConfigOverrideJSON = ""
|
||||
if config.executablePath != "" && config.runtime == "docker" {
|
||||
m.logger.Warningf("Ignoring executable-path=%q flag for %v", config.executablePath, config.runtime)
|
||||
config.executablePath = ""
|
||||
}
|
||||
|
||||
switch config.configSource {
|
||||
@@ -225,6 +229,17 @@ func (m command) validateFlags(c *cli.Context, config *config) error {
|
||||
return fmt.Errorf("unrecognized Config Source: %v", config.configSource)
|
||||
}
|
||||
|
||||
if config.configFilePath == "" {
|
||||
switch config.runtime {
|
||||
case "containerd":
|
||||
config.configFilePath = defaultContainerdConfigFilePath
|
||||
case "crio":
|
||||
config.configFilePath = defaultCrioConfigFilePath
|
||||
case "docker":
|
||||
config.configFilePath = defaultDockerConfigFilePath
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -241,9 +256,6 @@ func (m command) configureWrapper(c *cli.Context, config *config) error {
|
||||
|
||||
// configureConfigFile updates the specified container engine config file to enable the NVIDIA runtime.
|
||||
func (m command) configureConfigFile(c *cli.Context, config *config) error {
|
||||
configFilePath := config.resolveConfigFilePath()
|
||||
|
||||
var err error
|
||||
configSource, err := config.resolveConfigSource()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -254,19 +266,19 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
|
||||
case "containerd":
|
||||
cfg, err = containerd.New(
|
||||
containerd.WithLogger(m.logger),
|
||||
containerd.WithPath(configFilePath),
|
||||
containerd.WithPath(config.configFilePath),
|
||||
containerd.WithConfigSource(configSource),
|
||||
)
|
||||
case "crio":
|
||||
cfg, err = crio.New(
|
||||
crio.WithLogger(m.logger),
|
||||
crio.WithPath(configFilePath),
|
||||
crio.WithPath(config.configFilePath),
|
||||
crio.WithConfigSource(configSource),
|
||||
)
|
||||
case "docker":
|
||||
cfg, err = docker.New(
|
||||
docker.WithLogger(m.logger),
|
||||
docker.WithPath(configFilePath),
|
||||
docker.WithPath(config.configFilePath),
|
||||
)
|
||||
default:
|
||||
err = fmt.Errorf("unrecognized runtime '%v'", config.runtime)
|
||||
@@ -284,9 +296,8 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
err = enableCDI(config, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to enable CDI in %s: %w", config.runtime, err)
|
||||
if config.cdi.enabled {
|
||||
cfg.EnableCDI()
|
||||
}
|
||||
|
||||
outputPath := config.getOutputConfigPath()
|
||||
@@ -307,22 +318,6 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// resolveConfigFilePath returns the default config file path for the configured container engine
|
||||
func (c *config) resolveConfigFilePath() string {
|
||||
if c.configFilePath != "" {
|
||||
return c.configFilePath
|
||||
}
|
||||
switch c.runtime {
|
||||
case "containerd":
|
||||
return defaultContainerdConfigFilePath
|
||||
case "crio":
|
||||
return defaultCrioConfigFilePath
|
||||
case "docker":
|
||||
return defaultDockerConfigFilePath
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveConfigSource returns the default config source or the user provided config source
|
||||
func (c *config) resolveConfigSource() (toml.Loader, error) {
|
||||
switch c.configSource {
|
||||
@@ -339,9 +334,9 @@ func (c *config) resolveConfigSource() (toml.Loader, error) {
|
||||
func (c *config) getCommandConfigSource() toml.Loader {
|
||||
switch c.runtime {
|
||||
case "containerd":
|
||||
return containerd.CommandLineSource("")
|
||||
return containerd.CommandLineSource("", c.executablePath)
|
||||
case "crio":
|
||||
return crio.CommandLineSource("")
|
||||
return crio.CommandLineSource("", c.executablePath)
|
||||
}
|
||||
return toml.Empty
|
||||
}
|
||||
@@ -351,7 +346,7 @@ func (c *config) getOutputConfigPath() string {
|
||||
if c.dryRun {
|
||||
return ""
|
||||
}
|
||||
return c.resolveConfigFilePath()
|
||||
return c.configFilePath
|
||||
}
|
||||
|
||||
// configureOCIHook creates and configures the OCI hook for the NVIDIA runtime
|
||||
@@ -362,19 +357,3 @@ func (m *command) configureOCIHook(c *cli.Context, config *config) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// enableCDI enables the use of CDI in the corresponding container engine
|
||||
func enableCDI(config *config, cfg engine.Interface) error {
|
||||
if !config.cdi.enabled {
|
||||
return nil
|
||||
}
|
||||
switch config.runtime {
|
||||
case "containerd":
|
||||
cfg.Set("enable_cdi", true)
|
||||
case "docker":
|
||||
cfg.Set("features", map[string]bool{"cdi": true})
|
||||
default:
|
||||
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
|
||||
FROM nvidia/cuda:12.6.2-base-ubuntu20.04
|
||||
FROM nvidia/cuda:12.9.0-base-ubuntu20.04
|
||||
|
||||
ARG ARTIFACTS_ROOT
|
||||
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG VERSION="N/A"
|
||||
|
||||
FROM nvidia/cuda:12.6.2-base-ubi8 as build
|
||||
FROM nvidia/cuda:12.9.0-base-ubi8 as build
|
||||
|
||||
RUN yum install -y \
|
||||
wget make git gcc \
|
||||
@@ -48,7 +48,7 @@ COPY . .
|
||||
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
|
||||
|
||||
|
||||
FROM nvidia/cuda:12.6.2-base-ubi8
|
||||
FROM nvidia/cuda:12.9.0-base-ubi8
|
||||
|
||||
ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=void
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG VERSION="N/A"
|
||||
|
||||
FROM nvidia/cuda:12.6.2-base-ubuntu20.04 as build
|
||||
FROM nvidia/cuda:12.9.0-base-ubuntu20.04 as build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget make git gcc \
|
||||
@@ -47,7 +47,7 @@ COPY . .
|
||||
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
|
||||
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04
|
||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
|
||||
|
||||
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
|
||||
RUN rm -f /etc/apt/sources.list.d/cuda.list
|
||||
|
||||
@@ -27,12 +27,6 @@ DIST_DIR ?= $(CURDIR)/dist
|
||||
##### Global variables #####
|
||||
include $(CURDIR)/versions.mk
|
||||
|
||||
ifeq ($(IMAGE_NAME),)
|
||||
REGISTRY ?= nvidia
|
||||
IMAGE_NAME := $(REGISTRY)/container-toolkit
|
||||
endif
|
||||
|
||||
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
IMAGE_VERSION := $(VERSION)
|
||||
|
||||
IMAGE_TAG ?= $(VERSION)-$(DIST)
|
||||
@@ -49,6 +43,7 @@ DISTRIBUTIONS := ubuntu20.04 ubi8
|
||||
|
||||
META_TARGETS := packaging
|
||||
|
||||
IMAGE_TARGETS := $(patsubst %,image-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
|
||||
@@ -89,7 +84,7 @@ build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SU
|
||||
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
||||
|
||||
# Use a generic build target to build the relevant images
|
||||
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
$(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
|
||||
DOCKER_BUILDKIT=1 \
|
||||
$(DOCKER) $(BUILDX) build --pull \
|
||||
--provenance=false --sbom=false \
|
||||
@@ -108,7 +103,6 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
-f $(DOCKERFILE) \
|
||||
$(CURDIR)
|
||||
|
||||
|
||||
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
|
||||
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
|
||||
|
||||
@@ -122,7 +116,13 @@ build-packaging: PACKAGE_DIST = all
|
||||
# Test targets
|
||||
test-%: DIST = $(*)
|
||||
|
||||
TEST_CASES ?= toolkit docker crio containerd
|
||||
# Handle the default build target.
|
||||
.PHONY: build
|
||||
build: $(DEFAULT_PUSH_TARGET)
|
||||
$(DEFAULT_PUSH_TARGET): build-$(DEFAULT_PUSH_TARGET)
|
||||
$(DEFAULT_PUSH_TARGET): DIST = $(DEFAULT_PUSH_TARGET)
|
||||
|
||||
TEST_CASES ?= docker crio containerd
|
||||
$(TEST_TARGETS): test-%:
|
||||
TEST_CASES="$(TEST_CASES)" bash -x $(CURDIR)/test/container/main.sh run \
|
||||
$(CURDIR)/shared-$(*) \
|
||||
|
||||
@@ -16,8 +16,7 @@ PUSH_ON_BUILD ?= false
|
||||
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
|
||||
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
|
||||
|
||||
# We only generate amd64 image for ubuntu18.04
|
||||
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
$(BUILD_TARGETS): build-%: image-%
|
||||
|
||||
# We only generate a single image for packaging targets
|
||||
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
@@ -12,4 +12,22 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
PUSH_ON_BUILD ?= false
|
||||
ARCH ?= $(shell uname -m)
|
||||
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/$(ARCH)
|
||||
|
||||
ifeq ($(PUSH_ON_BUILD),true)
|
||||
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
|
||||
$(BUILD_TARGETS): build-%: image-%
|
||||
$(DOCKER) push "$(IMAGE)"
|
||||
else
|
||||
$(BUILD_TARGETS): build-%: image-%
|
||||
endif
|
||||
|
||||
# For the default distribution we also retag the image.
|
||||
# Note: This needs to be updated for multi-arch images.
|
||||
ifeq ($(IMAGE_TAG),$(VERSION)-$(DIST))
|
||||
$(DEFAULT_PUSH_TARGET):
|
||||
$(DOCKER) image inspect $(IMAGE) > /dev/null || $(DOCKER) pull $(IMAGE)
|
||||
$(DOCKER) tag $(IMAGE) $(subst :$(IMAGE_TAG),:$(VERSION),$(IMAGE))
|
||||
endif
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# This Dockerfile is also used to define the golang version used in this project
|
||||
# This allows dependabot to manage this version in addition to other images.
|
||||
FROM golang:1.23.2
|
||||
FROM golang:1.23.9
|
||||
|
||||
WORKDIR /work
|
||||
COPY * .
|
||||
|
||||
23
go.mod
23
go.mod
@@ -1,32 +1,35 @@
|
||||
module github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
go 1.20
|
||||
go 1.22
|
||||
|
||||
require (
|
||||
github.com/NVIDIA/go-nvlib v0.6.1
|
||||
github.com/NVIDIA/go-nvml v0.12.4-0
|
||||
github.com/NVIDIA/go-nvlib v0.7.2
|
||||
github.com/NVIDIA/go-nvml v0.12.4-1
|
||||
github.com/cyphar/filepath-securejoin v0.4.1
|
||||
github.com/fsnotify/fsnotify v1.7.0
|
||||
github.com/moby/sys/reexec v0.1.0
|
||||
github.com/moby/sys/symlink v0.3.0
|
||||
github.com/opencontainers/runtime-spec v1.2.0
|
||||
github.com/opencontainers/runc v1.2.6
|
||||
github.com/opencontainers/runtime-spec v1.2.1
|
||||
github.com/pelletier/go-toml v1.9.5
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/stretchr/testify v1.9.0
|
||||
github.com/urfave/cli/v2 v2.27.4
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/urfave/cli/v2 v2.27.5
|
||||
golang.org/x/mod v0.20.0
|
||||
golang.org/x/sys v0.26.0
|
||||
tags.cncf.io/container-device-interface v0.8.0
|
||||
golang.org/x/sys v0.28.0
|
||||
tags.cncf.io/container-device-interface v0.8.1
|
||||
tags.cncf.io/container-device-interface/specs-go v0.8.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
|
||||
github.com/opencontainers/selinux v1.11.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/rogpeppe/go-internal v1.11.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
||||
|
||||
41
go.sum
41
go.sum
@@ -1,12 +1,14 @@
|
||||
github.com/NVIDIA/go-nvlib v0.6.1 h1:0/5FvaKvDJoJeJ+LFlh+NDQMxMlVw9wOXrOVrGXttfE=
|
||||
github.com/NVIDIA/go-nvlib v0.6.1/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY=
|
||||
github.com/NVIDIA/go-nvml v0.12.4-0 h1:4tkbB3pT1O77JGr0gQ6uD8FrsUPqP1A/EOEm2wI1TUg=
|
||||
github.com/NVIDIA/go-nvml v0.12.4-0/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
|
||||
github.com/NVIDIA/go-nvlib v0.7.2 h1:7sy/NVUa4sM9FLKwH6CjBfHSWrJUmv8emVyxLTzjfOA=
|
||||
github.com/NVIDIA/go-nvlib v0.7.2/go.mod h1:2Kh2kYSP5IJ8EKf0/SYDzHiQKb9EJkwOf2LQzu6pXzY=
|
||||
github.com/NVIDIA/go-nvml v0.12.4-1 h1:WKUvqshhWSNTfm47ETRhv0A0zJyr1ncCuHiXwoTrBEc=
|
||||
github.com/NVIDIA/go-nvml v0.12.4-1/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
|
||||
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
|
||||
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
|
||||
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@@ -28,12 +30,16 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
|
||||
github.com/moby/sys/reexec v0.1.0 h1:RrBi8e0EBTLEgfruBOFcxtElzRGTEUkeIFaVXgU7wok=
|
||||
github.com/moby/sys/reexec v0.1.0/go.mod h1:EqjBg8F3X7iZe5pU6nRZnYCMUTXoxsjiIfHup5wYIN8=
|
||||
github.com/moby/sys/symlink v0.3.0 h1:GZX89mEZ9u53f97npBy4Rc3vJKj7JBDj/PN2I22GrNU=
|
||||
github.com/moby/sys/symlink v0.3.0/go.mod h1:3eNdhduHmYPcgsJtZXW1W4XUJdZGBIkttZ8xKqPUJq0=
|
||||
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
|
||||
github.com/opencontainers/runc v1.2.6 h1:P7Hqg40bsMvQGCS4S7DJYhUZOISMLJOB2iGX5COWiPk=
|
||||
github.com/opencontainers/runc v1.2.6/go.mod h1:dOQeFo29xZKBNeRBI0B19mJtfHv68YgCTh1X+YphA+4=
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
|
||||
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-spec v1.2.1 h1:S4k4ryNgEpxW1dzyqffOmhI1BHYcjzU8lpJfSlR0xww=
|
||||
github.com/opencontainers/runtime-spec v1.2.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
|
||||
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
|
||||
@@ -44,8 +50,9 @@ github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCko
|
||||
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
|
||||
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
|
||||
@@ -55,13 +62,13 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
|
||||
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
|
||||
github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
|
||||
github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
|
||||
github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w=
|
||||
github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
||||
@@ -76,8 +83,8 @@ golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
|
||||
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
@@ -88,7 +95,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
|
||||
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
|
||||
tags.cncf.io/container-device-interface v0.8.0 h1:8bCFo/g9WODjWx3m6EYl3GfUG31eKJbaggyBDxEldRc=
|
||||
tags.cncf.io/container-device-interface v0.8.0/go.mod h1:Apb7N4VdILW0EVdEMRYXIDVRZfNJZ+kmEUss2kRRQ6Y=
|
||||
tags.cncf.io/container-device-interface v0.8.1 h1:c0jN4Mt6781jD67NdPajmZlD1qrqQyov/Xfoab37lj0=
|
||||
tags.cncf.io/container-device-interface v0.8.1/go.mod h1:Apb7N4VdILW0EVdEMRYXIDVRZfNJZ+kmEUss2kRRQ6Y=
|
||||
tags.cncf.io/container-device-interface/specs-go v0.8.0 h1:QYGFzGxvYK/ZLMrjhvY0RjpUavIn4KcmRmVP/JjdBTA=
|
||||
tags.cncf.io/container-device-interface/specs-go v0.8.0/go.mod h1:BhJIkjjPh4qpys+qm4DAYtUyryaTDg9zris+AczXyws=
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
@@ -34,29 +35,62 @@ type ContainerCLIConfig struct {
|
||||
NoPivot bool `toml:"no-pivot,omitempty"`
|
||||
NoCgroups bool `toml:"no-cgroups"`
|
||||
User string `toml:"user"`
|
||||
Ldconfig string `toml:"ldconfig"`
|
||||
// Ldconfig represents the path to the ldconfig binary to be used to update
|
||||
// the ldcache in a container as it is being created.
|
||||
// If this path starts with a '@' the path is relative to the host and if
|
||||
// not it is treated as a container path.
|
||||
//
|
||||
// Note that the use of container paths are disabled by default and if this
|
||||
// is required, the features.allow-ldconfig-from-container feature gate must
|
||||
// be enabled explicitly.
|
||||
Ldconfig ldconfigPath `toml:"ldconfig"`
|
||||
}
|
||||
|
||||
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
||||
// This is only done for host LDConfigs and is required to handle systems where
|
||||
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
|
||||
func (c *ContainerCLIConfig) NormalizeLDConfigPath() string {
|
||||
return NormalizeLDConfigPath(c.Ldconfig)
|
||||
return string(c.Ldconfig.normalize())
|
||||
}
|
||||
|
||||
// An ldconfigPath is used to represent the path to ldconfig.
|
||||
type ldconfigPath string
|
||||
|
||||
func (p ldconfigPath) assertValid(allowContainerRelativePath bool) error {
|
||||
if p.isHostRelative() {
|
||||
return nil
|
||||
}
|
||||
if allowContainerRelativePath {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("nvidia-container-cli.ldconfig value %q is not host-relative (does not start with a '@')", p)
|
||||
}
|
||||
|
||||
func (p ldconfigPath) isHostRelative() bool {
|
||||
return strings.HasPrefix(string(p), "@")
|
||||
}
|
||||
|
||||
// normalize returns the resolved path of the configured LDConfig binary.
|
||||
// This is only done for host LDConfigs and is required to handle systems where
|
||||
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
|
||||
func (p ldconfigPath) normalize() ldconfigPath {
|
||||
if !p.isHostRelative() {
|
||||
return p
|
||||
}
|
||||
|
||||
path := string(p)
|
||||
trimmedPath := strings.TrimSuffix(strings.TrimPrefix(path, "@"), ".real")
|
||||
// If the .real path exists, we return that.
|
||||
if _, err := os.Stat(trimmedPath + ".real"); err == nil {
|
||||
return ldconfigPath("@" + trimmedPath + ".real")
|
||||
}
|
||||
// If the .real path does not exists (or cannot be read) we return the non-.real path.
|
||||
return ldconfigPath("@" + trimmedPath)
|
||||
}
|
||||
|
||||
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
||||
// This is only done for host LDConfigs and is required to handle systems where
|
||||
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
|
||||
func NormalizeLDConfigPath(path string) string {
|
||||
if !strings.HasPrefix(path, "@") {
|
||||
return path
|
||||
}
|
||||
|
||||
trimmedPath := strings.TrimSuffix(strings.TrimPrefix(path, "@"), ".real")
|
||||
// If the .real path exists, we return that.
|
||||
if _, err := os.Stat(trimmedPath + ".real"); err == nil {
|
||||
return "@" + trimmedPath + ".real"
|
||||
}
|
||||
// If the .real path does not exists (or cannot be read) we return the non-.real path.
|
||||
return "@" + trimmedPath
|
||||
return string(ldconfigPath(path).normalize())
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ func TestNormalizeLDConfigPath(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
ldconfig string
|
||||
ldconfig ldconfigPath
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
@@ -51,12 +51,12 @@ func TestNormalizeLDConfigPath(t *testing.T) {
|
||||
},
|
||||
{
|
||||
description: "host .real file exists is returned",
|
||||
ldconfig: "@" + filepath.Join(testDir, "exists.real"),
|
||||
ldconfig: ldconfigPath("@" + filepath.Join(testDir, "exists.real")),
|
||||
expected: "@" + filepath.Join(testDir, "exists.real"),
|
||||
},
|
||||
{
|
||||
description: "host resolves .real file",
|
||||
ldconfig: "@" + filepath.Join(testDir, "exists"),
|
||||
ldconfig: ldconfigPath("@" + filepath.Join(testDir, "exists")),
|
||||
expected: "@" + filepath.Join(testDir, "exists.real"),
|
||||
},
|
||||
{
|
||||
|
||||
@@ -18,6 +18,7 @@ package config
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -51,6 +52,8 @@ var (
|
||||
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
|
||||
)
|
||||
|
||||
var errInvalidConfig = errors.New("invalid config value")
|
||||
|
||||
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
|
||||
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
|
||||
type Config struct {
|
||||
@@ -118,6 +121,9 @@ func GetDefault() (*Config, error) {
|
||||
AnnotationPrefixes: []string{cdi.AnnotationPrefix},
|
||||
SpecDirs: cdi.DefaultSpecDirs,
|
||||
},
|
||||
Legacy: legacyModeConfig{
|
||||
CUDACompatMode: defaultCUDACompatMode,
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
|
||||
@@ -127,8 +133,20 @@ func GetDefault() (*Config, error) {
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
func getLdConfigPath() string {
|
||||
return NormalizeLDConfigPath("@/sbin/ldconfig")
|
||||
// assertValid checks for a valid config.
|
||||
func (c *Config) assertValid() error {
|
||||
err := c.NVIDIAContainerCLIConfig.Ldconfig.assertValid(c.Features.AllowLDConfigFromContainer.IsEnabled())
|
||||
if err != nil {
|
||||
return errors.Join(err, errInvalidConfig)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getLdConfigPath allows us to override this function for testing.
|
||||
var getLdConfigPath = getLdConfigPathStub
|
||||
|
||||
func getLdConfigPathStub() ldconfigPath {
|
||||
return ldconfigPath("@/sbin/ldconfig").normalize()
|
||||
}
|
||||
|
||||
func getUserGroup() string {
|
||||
|
||||
@@ -44,23 +44,21 @@ func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
|
||||
func TestGetConfig(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
contents []string
|
||||
expectedError error
|
||||
inspectLdconfig bool
|
||||
distIdsLike []string
|
||||
expectedConfig *Config
|
||||
description string
|
||||
contents []string
|
||||
expectedError error
|
||||
distIdsLike []string
|
||||
expectedConfig *Config
|
||||
}{
|
||||
{
|
||||
description: "empty config is default",
|
||||
inspectLdconfig: true,
|
||||
description: "empty config is default",
|
||||
expectedConfig: &Config{
|
||||
AcceptEnvvarUnprivileged: true,
|
||||
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "",
|
||||
LoadKmods: true,
|
||||
Ldconfig: "WAS_CHECKED",
|
||||
Ldconfig: "@/test/ld/config/path",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
@@ -76,6 +74,9 @@ func TestGetConfig(t *testing.T) {
|
||||
AnnotationPrefixes: []string{"cdi.k8s.io/"},
|
||||
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
|
||||
},
|
||||
Legacy: legacyModeConfig{
|
||||
CUDACompatMode: "ldconfig",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
|
||||
@@ -93,8 +94,9 @@ func TestGetConfig(t *testing.T) {
|
||||
"supported-driver-capabilities = \"compute,utility\"",
|
||||
"nvidia-container-cli.root = \"/bar/baz\"",
|
||||
"nvidia-container-cli.load-kmods = false",
|
||||
"nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"",
|
||||
"nvidia-container-cli.ldconfig = \"@/foo/bar/ldconfig\"",
|
||||
"nvidia-container-cli.user = \"foo:bar\"",
|
||||
"nvidia-container-cli.cuda-compat-mode = \"mount\"",
|
||||
"nvidia-container-runtime.debug = \"/foo/bar\"",
|
||||
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
|
||||
"nvidia-container-runtime.log-level = \"debug\"",
|
||||
@@ -104,6 +106,7 @@ func TestGetConfig(t *testing.T) {
|
||||
"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
|
||||
"nvidia-container-runtime.modes.cdi.spec-dirs = [\"/except/etc/cdi\", \"/not/var/run/cdi\",]",
|
||||
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"nvidia-container-runtime.modes.legacy.cuda-compat-mode = \"mount\"",
|
||||
"nvidia-container-runtime-hook.path = \"/foo/bar/nvidia-container-runtime-hook\"",
|
||||
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
@@ -113,7 +116,7 @@ func TestGetConfig(t *testing.T) {
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "/bar/baz",
|
||||
LoadKmods: false,
|
||||
Ldconfig: "/foo/bar/ldconfig",
|
||||
Ldconfig: "@/foo/bar/ldconfig",
|
||||
User: "foo:bar",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
@@ -136,6 +139,9 @@ func TestGetConfig(t *testing.T) {
|
||||
"/not/var/run/cdi",
|
||||
},
|
||||
},
|
||||
Legacy: legacyModeConfig{
|
||||
CUDACompatMode: "mount",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
|
||||
@@ -146,6 +152,56 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "feature allows ldconfig to be overridden",
|
||||
contents: []string{
|
||||
"[nvidia-container-cli]",
|
||||
"ldconfig = \"/foo/bar/ldconfig\"",
|
||||
"[features]",
|
||||
"allow-ldconfig-from-container = true",
|
||||
},
|
||||
expectedConfig: &Config{
|
||||
AcceptEnvvarUnprivileged: true,
|
||||
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Ldconfig: "/foo/bar/ldconfig",
|
||||
LoadKmods: true,
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
Runtimes: []string{"docker-runc", "runc", "crun"},
|
||||
Mode: "auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "nvidia.com/gpu",
|
||||
AnnotationPrefixes: []string{
|
||||
"cdi.k8s.io/",
|
||||
},
|
||||
SpecDirs: []string{
|
||||
"/etc/cdi",
|
||||
"/var/run/cdi",
|
||||
},
|
||||
},
|
||||
Legacy: legacyModeConfig{
|
||||
CUDACompatMode: "ldconfig",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
|
||||
Path: "nvidia-container-runtime-hook",
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "nvidia-ctk",
|
||||
},
|
||||
Features: features{
|
||||
AllowLDConfigFromContainer: ptr(feature(true)),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "config options set in section",
|
||||
contents: []string{
|
||||
@@ -154,7 +210,8 @@ func TestGetConfig(t *testing.T) {
|
||||
"[nvidia-container-cli]",
|
||||
"root = \"/bar/baz\"",
|
||||
"load-kmods = false",
|
||||
"ldconfig = \"/foo/bar/ldconfig\"",
|
||||
"ldconfig = \"@/foo/bar/ldconfig\"",
|
||||
"cuda-compat-mode = \"mount\"",
|
||||
"user = \"foo:bar\"",
|
||||
"[nvidia-container-runtime]",
|
||||
"debug = \"/foo/bar\"",
|
||||
@@ -168,6 +225,8 @@ func TestGetConfig(t *testing.T) {
|
||||
"spec-dirs = [\"/except/etc/cdi\", \"/not/var/run/cdi\",]",
|
||||
"[nvidia-container-runtime.modes.csv]",
|
||||
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"[nvidia-container-runtime.modes.legacy]",
|
||||
"cuda-compat-mode = \"mount\"",
|
||||
"[nvidia-container-runtime-hook]",
|
||||
"path = \"/foo/bar/nvidia-container-runtime-hook\"",
|
||||
"[nvidia-ctk]",
|
||||
@@ -179,7 +238,7 @@ func TestGetConfig(t *testing.T) {
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "/bar/baz",
|
||||
LoadKmods: false,
|
||||
Ldconfig: "/foo/bar/ldconfig",
|
||||
Ldconfig: "@/foo/bar/ldconfig",
|
||||
User: "foo:bar",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
@@ -202,6 +261,9 @@ func TestGetConfig(t *testing.T) {
|
||||
"/not/var/run/cdi",
|
||||
},
|
||||
},
|
||||
Legacy: legacyModeConfig{
|
||||
CUDACompatMode: "mount",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
|
||||
@@ -213,16 +275,15 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "suse config",
|
||||
distIdsLike: []string{"suse", "opensuse"},
|
||||
inspectLdconfig: true,
|
||||
description: "suse config",
|
||||
distIdsLike: []string{"suse", "opensuse"},
|
||||
expectedConfig: &Config{
|
||||
AcceptEnvvarUnprivileged: true,
|
||||
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "",
|
||||
LoadKmods: true,
|
||||
Ldconfig: "WAS_CHECKED",
|
||||
Ldconfig: "@/test/ld/config/path",
|
||||
User: "root:video",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
@@ -239,6 +300,9 @@ func TestGetConfig(t *testing.T) {
|
||||
AnnotationPrefixes: []string{"cdi.k8s.io/"},
|
||||
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
|
||||
},
|
||||
Legacy: legacyModeConfig{
|
||||
CUDACompatMode: "ldconfig",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
|
||||
@@ -250,9 +314,8 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "suse config overrides user",
|
||||
distIdsLike: []string{"suse", "opensuse"},
|
||||
inspectLdconfig: true,
|
||||
description: "suse config overrides user",
|
||||
distIdsLike: []string{"suse", "opensuse"},
|
||||
contents: []string{
|
||||
"nvidia-container-cli.user = \"foo:bar\"",
|
||||
},
|
||||
@@ -262,7 +325,7 @@ func TestGetConfig(t *testing.T) {
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "",
|
||||
LoadKmods: true,
|
||||
Ldconfig: "WAS_CHECKED",
|
||||
Ldconfig: "@/test/ld/config/path",
|
||||
User: "foo:bar",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
@@ -279,6 +342,9 @@ func TestGetConfig(t *testing.T) {
|
||||
AnnotationPrefixes: []string{"cdi.k8s.io/"},
|
||||
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
|
||||
},
|
||||
Legacy: legacyModeConfig{
|
||||
CUDACompatMode: "ldconfig",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
|
||||
@@ -293,6 +359,7 @@ func TestGetConfig(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
defer setGetLdConfigPathForTest()()
|
||||
defer setGetDistIDLikeForTest(tc.distIdsLike)()
|
||||
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
|
||||
|
||||
@@ -305,21 +372,63 @@ func TestGetConfig(t *testing.T) {
|
||||
cfg, err := tomlCfg.Config()
|
||||
require.NoError(t, err)
|
||||
|
||||
// We first handle the ldconfig path since this is currently system-dependent.
|
||||
if tc.inspectLdconfig {
|
||||
ldconfig := cfg.NVIDIAContainerCLIConfig.Ldconfig
|
||||
require.True(t, strings.HasPrefix(ldconfig, "@/sbin/ldconfig"))
|
||||
remaining := strings.TrimPrefix(ldconfig, "@/sbin/ldconfig")
|
||||
require.True(t, remaining == ".real" || remaining == "")
|
||||
|
||||
cfg.NVIDIAContainerCLIConfig.Ldconfig = "WAS_CHECKED"
|
||||
}
|
||||
|
||||
require.EqualValues(t, tc.expectedConfig, cfg)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAssertValid(t *testing.T) {
|
||||
defer setGetLdConfigPathForTest()()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
config *Config
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "default is valid",
|
||||
config: func() *Config {
|
||||
config, _ := GetDefault()
|
||||
return config
|
||||
}(),
|
||||
},
|
||||
{
|
||||
description: "alternative host ldconfig path is valid",
|
||||
config: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Ldconfig: "@/some/host/path",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-host path is invalid",
|
||||
config: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Ldconfig: "/non/host/path",
|
||||
},
|
||||
},
|
||||
expectedError: errInvalidConfig,
|
||||
},
|
||||
{
|
||||
description: "feature flag allows non-host path",
|
||||
config: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Ldconfig: "/non/host/path",
|
||||
},
|
||||
Features: features{
|
||||
AllowLDConfigFromContainer: ptr(feature(true)),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
require.ErrorIs(t, tc.config.assertValid(), tc.expectedError)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// setGetDistIDsLikeForTest overrides the distribution IDs that would normally be read from the /etc/os-release file.
|
||||
func setGetDistIDLikeForTest(ids []string) func() {
|
||||
if ids == nil {
|
||||
@@ -335,3 +444,18 @@ func setGetDistIDLikeForTest(ids []string) func() {
|
||||
getDistIDLike = original
|
||||
}
|
||||
}
|
||||
|
||||
// prt returns a reference to whatever type is passed into it
|
||||
func ptr[T any](x T) *T {
|
||||
return &x
|
||||
}
|
||||
|
||||
func setGetLdConfigPathForTest() func() {
|
||||
previous := getLdConfigPath
|
||||
getLdConfigPath = func() ldconfigPath {
|
||||
return "@/test/ld/config/path"
|
||||
}
|
||||
return func() {
|
||||
getLdConfigPath = previous
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,17 +18,38 @@ package config
|
||||
|
||||
// features specifies a set of named features.
|
||||
type features struct {
|
||||
// AllowCUDACompatLibsFromContainer allows CUDA compat libs from a container
|
||||
// to override certain driver library mounts from the host.
|
||||
AllowCUDACompatLibsFromContainer *feature `toml:"allow-cuda-compat-libs-from-container,omitempty"`
|
||||
// AllowLDConfigFromContainer allows non-host ldconfig paths to be used.
|
||||
// If this feature flag is not set to 'true' only host-rooted config paths
|
||||
// (i.e. paths starting with an '@' are considered valid)
|
||||
AllowLDConfigFromContainer *feature `toml:"allow-ldconfig-from-container,omitempty"`
|
||||
// DisableCUDACompatLibHook, when enabled skips the injection of a specific
|
||||
// hook to process CUDA compatibility libraries.
|
||||
//
|
||||
// Note: Since this mechanism replaces the logic in the `nvidia-container-cli`,
|
||||
// toggling this feature has no effect if `allow-cuda-compat-libs-from-container` is enabled.
|
||||
DisableCUDACompatLibHook *feature `toml:"disable-cuda-compat-lib-hook,omitempty"`
|
||||
// DisableImexChannelCreation ensures that the implicit creation of
|
||||
// requested IMEX channels is skipped when invoking the nvidia-container-cli.
|
||||
DisableImexChannelCreation *feature `toml:"disable-imex-channel-creation,omitempty"`
|
||||
// IgnoreImexChannelRequests configures the NVIDIA Container Toolkit to
|
||||
// ignore IMEX channel requests through the NVIDIA_IMEX_CHANNELS envvar or
|
||||
// volume mounts.
|
||||
// This ensures that the NVIDIA Container Toolkit cannot be used to provide
|
||||
// access to an IMEX channel by simply specifying an environment variable,
|
||||
// possibly bypassing other checks by an orchestration system such as
|
||||
// kubernetes.
|
||||
// Note that this is not enabled by default to maintain backward compatibility
|
||||
// with the existing behaviour when the NVIDIA Container Toolkit is used in
|
||||
// non-kubernetes environments.
|
||||
IgnoreImexChannelRequests *feature `toml:"ignore-imex-channel-requests,omitempty"`
|
||||
}
|
||||
|
||||
//nolint:unused
|
||||
type feature bool
|
||||
|
||||
// IsEnabled checks whether a feature is explicitly enabled.
|
||||
//
|
||||
//nolint:unused
|
||||
func (f *feature) IsEnabled() bool {
|
||||
if f != nil {
|
||||
return bool(*f)
|
||||
|
||||
@@ -24,13 +24,3 @@ type RuntimeHookConfig struct {
|
||||
// SkipModeDetection disables the mode check for the runtime hook.
|
||||
SkipModeDetection bool `toml:"skip-mode-detection"`
|
||||
}
|
||||
|
||||
// GetDefaultRuntimeHookConfig defines the default values for the config
|
||||
func GetDefaultRuntimeHookConfig() (*RuntimeHookConfig, error) {
|
||||
cfg, err := GetDefault()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &cfg.NVIDIAContainerRuntimeHookConfig, nil
|
||||
}
|
||||
|
||||
@@ -292,7 +292,11 @@ func (i CUDA) CDIDevicesFromMounts() []string {
|
||||
|
||||
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image.
|
||||
func (i CUDA) ImexChannelsFromEnvVar() []string {
|
||||
return i.DevicesFromEnvvars(EnvVarNvidiaImexChannels).List()
|
||||
imexChannels := i.DevicesFromEnvvars(EnvVarNvidiaImexChannels).List()
|
||||
if len(imexChannels) == 1 && imexChannels[0] == "all" {
|
||||
return nil
|
||||
}
|
||||
return imexChannels
|
||||
}
|
||||
|
||||
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image.
|
||||
|
||||
@@ -203,6 +203,37 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestImexChannelsFromEnvVar(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
env []string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
description: "no imex channels specified",
|
||||
},
|
||||
{
|
||||
description: "imex channel specified",
|
||||
env: []string{
|
||||
"NVIDIA_IMEX_CHANNELS=3,4",
|
||||
},
|
||||
expected: []string{"3", "4"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
|
||||
t.Run(tc.description+id, func(t *testing.T) {
|
||||
i, err := NewCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
||||
require.NoError(t, err)
|
||||
|
||||
channels := i.ImexChannelsFromEnvVar()
|
||||
require.EqualValues(t, tc.expected, channels)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func makeTestMounts(paths ...string) []specs.Mount {
|
||||
var mounts []specs.Mount
|
||||
for _, path := range paths {
|
||||
|
||||
@@ -29,8 +29,9 @@ type RuntimeConfig struct {
|
||||
|
||||
// modesConfig defines (optional) per-mode configs
|
||||
type modesConfig struct {
|
||||
CSV csvModeConfig `toml:"csv"`
|
||||
CDI cdiModeConfig `toml:"cdi"`
|
||||
CSV csvModeConfig `toml:"csv"`
|
||||
CDI cdiModeConfig `toml:"cdi"`
|
||||
Legacy legacyModeConfig `toml:"legacy"`
|
||||
}
|
||||
|
||||
type cdiModeConfig struct {
|
||||
@@ -46,12 +47,30 @@ type csvModeConfig struct {
|
||||
MountSpecPath string `toml:"mount-spec-path"`
|
||||
}
|
||||
|
||||
// GetDefaultRuntimeConfig defines the default values for the config
|
||||
func GetDefaultRuntimeConfig() (*RuntimeConfig, error) {
|
||||
cfg, err := GetDefault()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &cfg.NVIDIAContainerRuntimeConfig, nil
|
||||
type legacyModeConfig struct {
|
||||
// CUDACompatMode sets the mode to be used to make CUDA Forward Compat
|
||||
// libraries discoverable in the container.
|
||||
CUDACompatMode cudaCompatMode `toml:"cuda-compat-mode,omitempty"`
|
||||
}
|
||||
|
||||
type cudaCompatMode string
|
||||
|
||||
const (
|
||||
defaultCUDACompatMode = CUDACompatModeLdconfig
|
||||
// CUDACompatModeDisabled explicitly disables the handling of CUDA Forward
|
||||
// Compatibility in the NVIDIA Container Runtime and NVIDIA Container
|
||||
// Runtime Hook.
|
||||
CUDACompatModeDisabled = cudaCompatMode("disabled")
|
||||
// CUDACompatModeHook uses a container lifecycle hook to implement CUDA
|
||||
// Forward Compatibility support. This requires the use of the NVIDIA
|
||||
// Container Runtime and is not compatible with use cases where only the
|
||||
// NVIDIA Container Runtime Hook is used (e.g. the Docker --gpus flag).
|
||||
CUDACompatModeHook = cudaCompatMode("hook")
|
||||
// CUDACompatModeLdconfig adds the folders containing CUDA Forward Compat
|
||||
// libraries to the ldconfig command invoked from the NVIDIA Container
|
||||
// Runtime Hook.
|
||||
CUDACompatModeLdconfig = cudaCompatMode("ldconfig")
|
||||
// CUDACompatModeMount mounts CUDA Forward Compat folders from the container
|
||||
// to the container when using the NVIDIA Container Runtime Hook.
|
||||
CUDACompatModeMount = cudaCompatMode("mount")
|
||||
)
|
||||
|
||||
@@ -108,6 +108,19 @@ func loadConfigTomlFrom(reader io.Reader) (*Toml, error) {
|
||||
|
||||
// Config returns the typed config associated with the toml tree.
|
||||
func (t *Toml) Config() (*Config, error) {
|
||||
cfg, err := t.configNoOverrides()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cfg.assertValid(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// configNoOverrides returns the typed config associated with the toml tree.
|
||||
// This config does not include feature-specific overrides.
|
||||
func (t *Toml) configNoOverrides() (*Config, error) {
|
||||
cfg, err := GetDefault()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -74,6 +74,9 @@ spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime.modes.legacy]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "nvidia-container-runtime-hook"
|
||||
skip-mode-detection = false
|
||||
@@ -198,9 +201,12 @@ func TestTomlContents(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestConfigFromToml(t *testing.T) {
|
||||
defer setGetLdConfigPathForTest()()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
contents map[string]interface{}
|
||||
expectedError error
|
||||
expectedConfig *Config
|
||||
}{
|
||||
{
|
||||
@@ -226,13 +232,39 @@ func TestConfigFromToml(t *testing.T) {
|
||||
return c
|
||||
}(),
|
||||
},
|
||||
{
|
||||
description: "invalid ldconfig value raises error",
|
||||
contents: map[string]interface{}{
|
||||
"nvidia-container-cli": map[string]interface{}{
|
||||
"ldconfig": "/some/ldconfig/path",
|
||||
},
|
||||
},
|
||||
expectedError: errInvalidConfig,
|
||||
},
|
||||
{
|
||||
description: "feature allows ldconfig override",
|
||||
contents: map[string]interface{}{
|
||||
"nvidia-container-cli": map[string]interface{}{
|
||||
"ldconfig": "/some/ldconfig/path",
|
||||
},
|
||||
"features": map[string]interface{}{
|
||||
"allow-ldconfig-from-container": true,
|
||||
},
|
||||
},
|
||||
expectedConfig: func() *Config {
|
||||
c, _ := GetDefault()
|
||||
c.NVIDIAContainerCLIConfig.Ldconfig = "/some/ldconfig/path"
|
||||
c.Features.AllowLDConfigFromContainer = ptr(feature(true))
|
||||
return c
|
||||
}(),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tomlCfg := fromMap(tc.contents)
|
||||
config, err := tomlCfg.Config()
|
||||
require.NoError(t, err)
|
||||
require.ErrorIs(t, err, tc.expectedError)
|
||||
require.EqualValues(t, tc.expectedConfig, config)
|
||||
})
|
||||
}
|
||||
|
||||
24
internal/discover/compat_libs.go
Normal file
24
internal/discover/compat_libs.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
)
|
||||
|
||||
// NewCUDACompatHookDiscoverer creates a discoverer for a enable-cuda-compat hook.
|
||||
// This hook is responsible for setting up CUDA compatibility in the container and depends on the host driver version.
|
||||
func NewCUDACompatHookDiscoverer(logger logger.Interface, nvidiaCDIHookPath string, driver *root.Driver) Discover {
|
||||
_, cudaVersionPattern := getCUDALibRootAndVersionPattern(logger, driver)
|
||||
var args []string
|
||||
if !strings.Contains(cudaVersionPattern, "*") {
|
||||
args = append(args, "--host-driver-version="+cudaVersionPattern)
|
||||
}
|
||||
|
||||
return CreateNvidiaCDIHook(
|
||||
nvidiaCDIHookPath,
|
||||
"enable-cuda-compat",
|
||||
args...,
|
||||
)
|
||||
}
|
||||
@@ -21,26 +21,28 @@ import "fmt"
|
||||
// list is a discoverer that contains a list of Discoverers. The output of the
|
||||
// Mounts functions is the concatenation of the output for each of the
|
||||
// elements in the list.
|
||||
type list struct {
|
||||
discoverers []Discover
|
||||
}
|
||||
type list []Discover
|
||||
|
||||
var _ Discover = (*list)(nil)
|
||||
|
||||
// Merge creates a discoverer that is the composite of a list of discoverers.
|
||||
func Merge(d ...Discover) Discover {
|
||||
l := list{
|
||||
discoverers: d,
|
||||
func Merge(discoverers ...Discover) Discover {
|
||||
var l list
|
||||
for _, d := range discoverers {
|
||||
if d == nil {
|
||||
continue
|
||||
}
|
||||
l = append(l, d)
|
||||
}
|
||||
|
||||
return &l
|
||||
return l
|
||||
}
|
||||
|
||||
// Devices returns all devices from the included discoverers
|
||||
func (d list) Devices() ([]Device, error) {
|
||||
var allDevices []Device
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
for i, di := range d {
|
||||
devices, err := di.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering devices for discoverer %v: %v", i, err)
|
||||
@@ -55,7 +57,7 @@ func (d list) Devices() ([]Device, error) {
|
||||
func (d list) Mounts() ([]Mount, error) {
|
||||
var allMounts []Mount
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
for i, di := range d {
|
||||
mounts, err := di.Mounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering mounts for discoverer %v: %v", i, err)
|
||||
@@ -70,7 +72,7 @@ func (d list) Mounts() ([]Mount, error) {
|
||||
func (d list) Hooks() ([]Hook, error) {
|
||||
var allHooks []Hook
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
for i, di := range d {
|
||||
hooks, err := di.Hooks()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering hooks for discoverer %v: %v", i, err)
|
||||
|
||||
@@ -216,7 +216,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
HasTegraFilesFunc: func() (bool, string) {
|
||||
return tc.info["tegra"], "tegra"
|
||||
},
|
||||
UsesOnlyNVGPUModuleFunc: func() (bool, string) {
|
||||
HasOnlyIntegratedGPUsFunc: func() (bool, string) {
|
||||
return tc.info["nvgpu"], "nvgpu"
|
||||
},
|
||||
}
|
||||
|
||||
@@ -47,6 +47,11 @@ const (
|
||||
flagArchX8664 = 0x0300
|
||||
flagArchX32 = 0x0800
|
||||
flagArchPpc64le = 0x0500
|
||||
|
||||
// flagArch_ARM_LIBHF is the flag value for 32-bit ARM libs using hard-float.
|
||||
flagArch_ARM_LIBHF = 0x0900
|
||||
// flagArch_AARCH64_LIB64 is the flag value for 64-bit ARM libs.
|
||||
flagArch_AARCH64_LIB64 = 0x0a00
|
||||
)
|
||||
|
||||
var errInvalidCache = errors.New("invalid ld.so.cache file")
|
||||
@@ -195,10 +200,14 @@ func (c *ldcache) getEntries() []entry {
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArch_AARCH64_LIB64:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
bits = 64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArch_ARM_LIBHF:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
bits = 32
|
||||
default:
|
||||
|
||||
@@ -68,20 +68,10 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image
|
||||
return nil, fmt.Errorf("failed to get CDI spec: %v", err)
|
||||
}
|
||||
|
||||
cdiModifier, err := cdi.New(
|
||||
return cdi.New(
|
||||
cdi.WithLogger(logger),
|
||||
cdi.WithSpec(spec.Raw()),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct CDI modifier: %v", err)
|
||||
}
|
||||
|
||||
modifiers := Merge(
|
||||
nvidiaContainerRuntimeHookRemover{logger},
|
||||
cdiModifier,
|
||||
)
|
||||
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
func checkRequirements(logger logger.Interface, image image.CUDA) error {
|
||||
|
||||
@@ -19,7 +19,6 @@ package modifier
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
@@ -74,66 +73,3 @@ func TestNewCSVModifier(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCSVModifierRemovesHook(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
spec *specs.Spec
|
||||
expectedError error
|
||||
expectedSpec *specs.Spec
|
||||
}{
|
||||
{
|
||||
description: "modification removes existing nvidia-container-runtime-hook",
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-runtime-hook",
|
||||
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "modification removes existing nvidia-container-toolkit",
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-toolkit",
|
||||
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
m := nvidiaContainerRuntimeHookRemover{logger: logger}
|
||||
|
||||
err := m.Modify(tc.spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.Empty(t, tc.spec.Hooks.Prestart)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
@@ -35,7 +36,7 @@ import (
|
||||
// NVIDIA_GDRCOPY=enabled
|
||||
//
|
||||
// If not devices are selected, no changes are made.
|
||||
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
||||
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver) (oci.SpecModifier, error) {
|
||||
if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
||||
logger.Infof("No modification required; no devices requested")
|
||||
return nil, nil
|
||||
@@ -78,5 +79,41 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
// If the feature flag has explicitly been toggled, we don't make any modification.
|
||||
if !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
|
||||
cudaCompatDiscoverer, err := getCudaCompatModeDiscoverer(logger, cfg, driver)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct CUDA Compat discoverer: %w", err)
|
||||
}
|
||||
discoverers = append(discoverers, cudaCompatDiscoverer)
|
||||
}
|
||||
|
||||
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
|
||||
}
|
||||
|
||||
func getCudaCompatModeDiscoverer(logger logger.Interface, cfg *config.Config, driver *root.Driver) (discover.Discover, error) {
|
||||
// For legacy mode, we only include the enable-cuda-compat hook if cuda-compat-mode is set to hook.
|
||||
if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" && cfg.NVIDIAContainerRuntimeConfig.Modes.Legacy.CUDACompatMode != config.CUDACompatModeHook {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver)
|
||||
// For non-legacy modes we return the hook as is. These modes *should* already include the update-ldcache hook.
|
||||
if cfg.NVIDIAContainerRuntimeConfig.Mode != "legacy" {
|
||||
return compatLibHookDiscoverer, nil
|
||||
}
|
||||
|
||||
// For legacy mode, we also need to inject a hook to update the LDCache
|
||||
// after we have modifed the configuration.
|
||||
ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook(
|
||||
logger,
|
||||
discover.None{},
|
||||
cfg.NVIDIACTKConfig.Path,
|
||||
"",
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err)
|
||||
}
|
||||
|
||||
return discover.Merge(compatLibHookDiscoverer, ldcacheUpdateHookDiscoverer), nil
|
||||
}
|
||||
|
||||
@@ -33,6 +33,13 @@ type nvidiaContainerRuntimeHookRemover struct {
|
||||
|
||||
var _ oci.SpecModifier = (*nvidiaContainerRuntimeHookRemover)(nil)
|
||||
|
||||
// NewNvidiaContainerRuntimeHookRemover creates a modifier that removes any NVIDIA Container Runtime hooks from the provided spec.
|
||||
func NewNvidiaContainerRuntimeHookRemover(logger logger.Interface) oci.SpecModifier {
|
||||
return nvidiaContainerRuntimeHookRemover{
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// Modify removes any NVIDIA Container Runtime hooks from the provided spec
|
||||
func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error {
|
||||
if spec == nil {
|
||||
|
||||
@@ -22,14 +22,12 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
type list struct {
|
||||
modifiers []oci.SpecModifier
|
||||
}
|
||||
type List []oci.SpecModifier
|
||||
|
||||
// Merge merges a set of OCI specification modifiers as a list.
|
||||
// This can be used to compose modifiers.
|
||||
func Merge(modifiers ...oci.SpecModifier) oci.SpecModifier {
|
||||
var filteredModifiers []oci.SpecModifier
|
||||
var filteredModifiers List
|
||||
for _, m := range modifiers {
|
||||
if m == nil {
|
||||
continue
|
||||
@@ -37,19 +35,19 @@ func Merge(modifiers ...oci.SpecModifier) oci.SpecModifier {
|
||||
filteredModifiers = append(filteredModifiers, m)
|
||||
}
|
||||
|
||||
return list{
|
||||
modifiers: filteredModifiers,
|
||||
}
|
||||
return filteredModifiers
|
||||
}
|
||||
|
||||
// Modify applies a list of modifiers in sequence and returns on any errors encountered.
|
||||
func (m list) Modify(spec *specs.Spec) error {
|
||||
for _, mm := range m.modifiers {
|
||||
func (m List) Modify(spec *specs.Spec) error {
|
||||
for _, mm := range m {
|
||||
if mm == nil {
|
||||
continue
|
||||
}
|
||||
err := mm.Modify(spec)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -19,14 +19,14 @@ func TestMaintainSpec(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, f := range files {
|
||||
inputSpecPath := filepath.Join(moduleRoot, "test/input", f)
|
||||
inputSpecPath := filepath.Join(moduleRoot, "tests/input", f)
|
||||
|
||||
spec := NewFileSpec(inputSpecPath).(*fileSpec)
|
||||
|
||||
_, err := spec.Load()
|
||||
require.NoError(t, err)
|
||||
|
||||
outputSpecPath := filepath.Join(moduleRoot, "test/output", f)
|
||||
outputSpecPath := filepath.Join(moduleRoot, "tests/output", f)
|
||||
spec.path = outputSpecPath
|
||||
spec.Flush()
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ func TestGetFileList(t *testing.T) {
|
||||
}{
|
||||
{
|
||||
description: "returns list of CSV files",
|
||||
root: "test/input/csv_samples/",
|
||||
root: "tests/input/csv_samples/",
|
||||
files: []string{
|
||||
"jetson.csv",
|
||||
"simple_wrong.csv",
|
||||
@@ -46,15 +46,15 @@ func TestGetFileList(t *testing.T) {
|
||||
},
|
||||
{
|
||||
description: "handles empty folder",
|
||||
root: "test/input/csv_samples/empty",
|
||||
root: "tests/input/csv_samples/empty",
|
||||
},
|
||||
{
|
||||
description: "handles non-existent folder",
|
||||
root: "test/input/csv_samples/NONEXISTENT",
|
||||
root: "tests/input/csv_samples/NONEXISTENT",
|
||||
},
|
||||
{
|
||||
description: "handles non-existent folder root",
|
||||
root: "/NONEXISTENT/test/input/csv_samples/",
|
||||
root: "/NONEXISTENT/tests/input/csv_samples/",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -75,30 +75,35 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
|
||||
}
|
||||
|
||||
mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image)
|
||||
// We update the mode here so that we can continue passing just the config to other functions.
|
||||
cfg.NVIDIAContainerRuntimeConfig.Mode = mode
|
||||
modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, image)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// For CDI mode we make no additional modifications.
|
||||
if mode == "cdi" {
|
||||
return modeModifier, nil
|
||||
|
||||
var modifiers modifier.List
|
||||
for _, modifierType := range supportedModifierTypes(mode) {
|
||||
switch modifierType {
|
||||
case "mode":
|
||||
modifiers = append(modifiers, modeModifier)
|
||||
case "nvidia-hook-remover":
|
||||
modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger))
|
||||
case "graphics":
|
||||
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modifiers = append(modifiers, graphicsModifier)
|
||||
case "feature-gated":
|
||||
featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image, driver)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modifiers = append(modifiers, featureGatedModifier)
|
||||
}
|
||||
}
|
||||
|
||||
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
featureModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
modifiers := modifier.Merge(
|
||||
modeModifier,
|
||||
graphicsModifier,
|
||||
featureModifier,
|
||||
)
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
@@ -114,3 +119,17 @@ func newModeModifier(logger logger.Interface, mode string, cfg *config.Config, o
|
||||
|
||||
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
||||
}
|
||||
|
||||
// supportedModifierTypes returns the modifiers supported for a specific runtime mode.
|
||||
func supportedModifierTypes(mode string) []string {
|
||||
switch mode {
|
||||
case "cdi":
|
||||
// For CDI mode we make no additional modifications.
|
||||
return []string{"nvidia-hook-remover", "mode"}
|
||||
case "csv":
|
||||
// For CSV mode we support mode and feature-gated modification.
|
||||
return []string{"nvidia-hook-remover", "feature-gated", "mode"}
|
||||
default:
|
||||
return []string{"feature-gated", "graphics", "mode"}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
)
|
||||
|
||||
@@ -45,7 +46,7 @@ func TestMain(m *testing.M) {
|
||||
if err != nil {
|
||||
log.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
}
|
||||
testBinPath := filepath.Join(moduleRoot, "test", "bin")
|
||||
testBinPath := filepath.Join(moduleRoot, "tests", "bin")
|
||||
|
||||
// Set the environment variables for the test
|
||||
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
|
||||
@@ -165,3 +166,181 @@ func TestFactoryMethod(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewSpecModifier(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
driver := root.New(
|
||||
root.WithDriverRoot("/nvidia/driver/root"),
|
||||
)
|
||||
testCases := []struct {
|
||||
description string
|
||||
config *config.Config
|
||||
spec *specs.Spec
|
||||
expectedSpec *specs.Spec
|
||||
}{
|
||||
{
|
||||
description: "csv mode removes nvidia-container-runtime-hook",
|
||||
config: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-runtime-hook",
|
||||
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: nil,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv mode removes nvidia-container-toolkit",
|
||||
config: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-toolkit",
|
||||
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: nil,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "cdi mode removes nvidia-container-runtime-hook",
|
||||
config: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Mode: "cdi",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-runtime-hook",
|
||||
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: nil,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "cdi mode removes nvidia-container-toolkit",
|
||||
config: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Mode: "cdi",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-toolkit",
|
||||
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: nil,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "legacy mode keeps nvidia-container-runtime-hook",
|
||||
config: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-runtime-hook",
|
||||
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-runtime-hook",
|
||||
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "legacy mode keeps nvidia-container-toolkit",
|
||||
config: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-toolkit",
|
||||
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-toolkit",
|
||||
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
spec := &oci.SpecMock{
|
||||
LoadFunc: func() (*specs.Spec, error) {
|
||||
return tc.spec, nil
|
||||
},
|
||||
}
|
||||
m, err := newSpecModifier(logger, tc.config, spec, driver)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = m.Modify(tc.spec)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expectedSpec, tc.spec)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,12 +18,13 @@ package engine
|
||||
|
||||
// Interface defines the API for a runtime config updater.
|
||||
type Interface interface {
|
||||
DefaultRuntime() string
|
||||
AddRuntime(string, string, bool) error
|
||||
Set(string, interface{})
|
||||
DefaultRuntime() string
|
||||
EnableCDI()
|
||||
GetRuntimeConfig(string) (RuntimeConfig, error)
|
||||
RemoveRuntime(string) error
|
||||
Save(string) (int64, error)
|
||||
GetRuntimeConfig(string) (RuntimeConfig, error)
|
||||
String() string
|
||||
}
|
||||
|
||||
// RuntimeConfig defines the interface to query container runtime handler configuration
|
||||
|
||||
@@ -30,40 +30,40 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
|
||||
}
|
||||
config := *c.Tree
|
||||
|
||||
config.Set("version", int64(2))
|
||||
config.Set("version", c.Version)
|
||||
|
||||
runtimeNamesForConfig := engine.GetLowLevelRuntimes(c)
|
||||
for _, r := range runtimeNamesForConfig {
|
||||
options := config.GetSubtreeByPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", r})
|
||||
options := config.GetSubtreeByPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", r})
|
||||
if options == nil {
|
||||
continue
|
||||
}
|
||||
c.Logger.Debugf("using options from runtime %v: %v", r, options)
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}, options.Copy())
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name}, options.Copy())
|
||||
break
|
||||
}
|
||||
|
||||
if config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}) == nil {
|
||||
if config.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name}) == nil {
|
||||
c.Logger.Warningf("could not infer options from runtimes %v; using defaults", runtimeNamesForConfig)
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_type"}, c.RuntimeType)
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_root"}, "")
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_engine"}, "")
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "runtime_type"}, c.RuntimeType)
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "runtime_root"}, "")
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "runtime_engine"}, "")
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
|
||||
}
|
||||
|
||||
if len(c.ContainerAnnotations) > 0 {
|
||||
annotations, err := c.getRuntimeAnnotations([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"})
|
||||
annotations, err := c.getRuntimeAnnotations([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "container_annotations"})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
annotations = append(c.ContainerAnnotations, annotations...)
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "container_annotations"}, annotations)
|
||||
}
|
||||
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name, "options", "BinaryName"}, path)
|
||||
|
||||
if setAsDefault {
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}, name)
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}, name)
|
||||
}
|
||||
|
||||
*c.Tree = config
|
||||
@@ -96,21 +96,21 @@ func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) {
|
||||
return annotations, nil
|
||||
}
|
||||
|
||||
// Set sets the specified containerd option.
|
||||
func (c *Config) Set(key string, value interface{}) {
|
||||
config := *c.Tree
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", key}, value)
|
||||
*c.Tree = config
|
||||
}
|
||||
|
||||
// DefaultRuntime returns the default runtime for the cri-o config
|
||||
func (c Config) DefaultRuntime() string {
|
||||
if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok {
|
||||
if runtime, ok := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}).(string); ok {
|
||||
return runtime
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// EnableCDI sets the enable_cdi field in the Containerd config to true.
|
||||
func (c *Config) EnableCDI() {
|
||||
config := *c.Tree
|
||||
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "enable_cdi"}, true)
|
||||
*c.Tree = config
|
||||
}
|
||||
|
||||
// RemoveRuntime removes a runtime from the docker config
|
||||
func (c *Config) RemoveRuntime(name string) error {
|
||||
if c == nil || c.Tree == nil {
|
||||
@@ -119,14 +119,14 @@ func (c *Config) RemoveRuntime(name string) error {
|
||||
|
||||
config := *c.Tree
|
||||
|
||||
config.DeletePath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name})
|
||||
if runtime, ok := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok {
|
||||
config.DeletePath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name})
|
||||
if runtime, ok := config.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}).(string); ok {
|
||||
if runtime == name {
|
||||
config.DeletePath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"})
|
||||
config.DeletePath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"})
|
||||
}
|
||||
}
|
||||
|
||||
runtimePath := []string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}
|
||||
runtimePath := []string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name}
|
||||
for i := 0; i < len(runtimePath); i++ {
|
||||
if runtimes, ok := config.GetPath(runtimePath[:len(runtimePath)-i]).(*toml.Tree); ok {
|
||||
if len(runtimes.Keys()) == 0 {
|
||||
@@ -46,7 +46,7 @@ func TestAddRuntime(t *testing.T) {
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test.options]
|
||||
BinaryName = "/usr/bin/test"
|
||||
`,
|
||||
@@ -195,24 +195,85 @@ func TestAddRuntime(t *testing.T) {
|
||||
SystemdCgroup = false
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "empty v3 spec is supported",
|
||||
config: `
|
||||
version = 3
|
||||
`,
|
||||
expectedConfig: `
|
||||
version = 3
|
||||
[plugins]
|
||||
[plugins."io.containerd.cri.v1.runtime"]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.test]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.test.options]
|
||||
BinaryName = "/usr/bin/test"
|
||||
`,
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
description: "v3 spec is supported",
|
||||
config: `
|
||||
version = 3
|
||||
[plugins]
|
||||
[plugins."io.containerd.cri.v1.runtime"]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc]
|
||||
privileged_without_host_devices = true
|
||||
runtime_engine = "engine"
|
||||
runtime_root = "root"
|
||||
runtime_type = "type"
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options]
|
||||
BinaryName = "/usr/bin/runc"
|
||||
SystemdCgroup = true
|
||||
`,
|
||||
expectedConfig: `
|
||||
version = 3
|
||||
[plugins]
|
||||
[plugins."io.containerd.cri.v1.runtime"]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes]
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc]
|
||||
privileged_without_host_devices = true
|
||||
runtime_engine = "engine"
|
||||
runtime_root = "root"
|
||||
runtime_type = "type"
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options]
|
||||
BinaryName = "/usr/bin/runc"
|
||||
SystemdCgroup = true
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.test]
|
||||
privileged_without_host_devices = true
|
||||
runtime_engine = "engine"
|
||||
runtime_root = "root"
|
||||
runtime_type = "type"
|
||||
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.test.options]
|
||||
BinaryName = "/usr/bin/test"
|
||||
SystemdCgroup = true
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
cfg, err := toml.Load(tc.config)
|
||||
require.NoError(t, err)
|
||||
expectedConfig, err := toml.Load(tc.expectedConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
c := &Config{
|
||||
Logger: logger,
|
||||
Tree: cfg,
|
||||
}
|
||||
c, err := New(
|
||||
WithLogger(logger),
|
||||
WithConfigSource(toml.FromString(tc.config)),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = c.AddRuntime("test", "/usr/bin/test", tc.setAsDefault)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, expectedConfig.String(), cfg.String())
|
||||
require.EqualValues(t, expectedConfig.String(), c.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -299,13 +360,13 @@ func TestGetRuntimeConfig(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
cfg, err := toml.Load(config)
|
||||
|
||||
c, err := New(
|
||||
WithLogger(logger),
|
||||
WithConfigSource(toml.FromString(config)),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
c := &Config{
|
||||
Logger: logger,
|
||||
Tree: cfg,
|
||||
}
|
||||
rc, err := c.GetRuntimeConfig(tc.runtime)
|
||||
require.Equal(t, tc.expectedError, err)
|
||||
require.Equal(t, tc.expected, rc.GetBinaryPath())
|
||||
@@ -70,18 +70,20 @@ func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}, path)
|
||||
|
||||
if setAsDefault && c.UseDefaultRuntimeName {
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}, name)
|
||||
} else if setAsDefault {
|
||||
// Note: This is deprecated in containerd 1.4.0 and will be removed in 1.5.0
|
||||
if config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime"}) == nil {
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_type"}, c.RuntimeType)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_root"}, "")
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_engine"}, "")
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "privileged_without_host_devices"}, false)
|
||||
if setAsDefault {
|
||||
if !c.UseLegacyConfig {
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}, name)
|
||||
} else {
|
||||
// Note: This is deprecated in containerd 1.4.0 and will be removed in 1.5.0
|
||||
if config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime"}) == nil {
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_type"}, c.RuntimeType)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_root"}, "")
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_engine"}, "")
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "privileged_without_host_devices"}, false)
|
||||
}
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "BinaryName"}, path)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "Runtime"}, path)
|
||||
}
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "BinaryName"}, path)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "Runtime"}, path)
|
||||
}
|
||||
|
||||
*c.Tree = config
|
||||
@@ -141,13 +143,6 @@ func (c *ConfigV1) RemoveRuntime(name string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set sets the specified containerd option.
|
||||
func (c *ConfigV1) Set(key string, value interface{}) {
|
||||
config := *c.Tree
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", key}, value)
|
||||
*c.Tree = config
|
||||
}
|
||||
|
||||
// Save writes the config to a file
|
||||
func (c ConfigV1) Save(path string) (int64, error) {
|
||||
return (Config)(c).Save(path)
|
||||
@@ -163,3 +158,9 @@ func (c *ConfigV1) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) {
|
||||
tree: runtimeData,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *ConfigV1) EnableCDI() {
|
||||
config := *c.Tree
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "enable_cdi"}, true)
|
||||
*c.Tree = config
|
||||
}
|
||||
|
||||
@@ -200,20 +200,21 @@ func TestAddRuntimeV1(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
cfg, err := toml.Load(tc.config)
|
||||
require.NoError(t, err)
|
||||
expectedConfig, err := toml.Load(tc.expectedConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
c := &ConfigV1{
|
||||
Logger: logger,
|
||||
Tree: cfg,
|
||||
}
|
||||
c, err := New(
|
||||
WithLogger(logger),
|
||||
WithConfigSource(toml.FromString(tc.config)),
|
||||
WithUseLegacyConfig(true),
|
||||
WithRuntimeType(""),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = c.AddRuntime("test", "/usr/bin/test", tc.setAsDefault)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, expectedConfig.String(), cfg.String())
|
||||
require.EqualValues(t, expectedConfig.String(), c.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,13 +24,28 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultConfigVersion = 2
|
||||
defaultRuntimeType = "io.containerd.runc.v2"
|
||||
)
|
||||
|
||||
// Config represents the containerd config
|
||||
type Config struct {
|
||||
*toml.Tree
|
||||
Logger logger.Interface
|
||||
RuntimeType string
|
||||
UseDefaultRuntimeName bool
|
||||
ContainerAnnotations []string
|
||||
Version int64
|
||||
Logger logger.Interface
|
||||
RuntimeType string
|
||||
ContainerAnnotations []string
|
||||
// UseLegacyConfig indicates whether a config file pre v1.3 should be generated.
|
||||
// For version 1 config prior to containerd v1.4 the default runtime was
|
||||
// specified in a containerd.runtimes.default_runtime section.
|
||||
// This was deprecated in v1.4 in favour of containerd.default_runtime_name.
|
||||
// Support for this section has been removed in v2.0.
|
||||
UseLegacyConfig bool
|
||||
// CRIRuntimePluginName represents the fully qualified name of the containerd plugin
|
||||
// for the CRI runtime service. The name of this plugin was changed in v3 of the
|
||||
// containerd configuration file.
|
||||
CRIRuntimePluginName string
|
||||
}
|
||||
|
||||
var _ engine.Interface = (*Config)(nil)
|
||||
@@ -55,7 +70,8 @@ func (c *containerdCfgRuntime) GetBinaryPath() string {
|
||||
// New creates a containerd config with the specified options
|
||||
func New(opts ...Option) (engine.Interface, error) {
|
||||
b := &builder{
|
||||
runtimeType: defaultRuntimeType,
|
||||
configVersion: defaultConfigVersion,
|
||||
runtimeType: defaultRuntimeType,
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(b)
|
||||
@@ -72,63 +88,85 @@ func New(opts ...Option) (engine.Interface, error) {
|
||||
return nil, fmt.Errorf("failed to load config: %v", err)
|
||||
}
|
||||
|
||||
configVersion, err := b.parseVersion(tomlConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse config version: %w", err)
|
||||
}
|
||||
b.logger.Infof("Using config version %v", configVersion)
|
||||
|
||||
criRuntimePluginName, err := b.criRuntimePluginName(configVersion)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CRI runtime plugin name: %w", err)
|
||||
}
|
||||
b.logger.Infof("Using CRI runtime plugin name %q", criRuntimePluginName)
|
||||
|
||||
cfg := &Config{
|
||||
Tree: tomlConfig,
|
||||
Logger: b.logger,
|
||||
RuntimeType: b.runtimeType,
|
||||
UseDefaultRuntimeName: b.useLegacyConfig,
|
||||
ContainerAnnotations: b.containerAnnotations,
|
||||
Tree: tomlConfig,
|
||||
Version: configVersion,
|
||||
CRIRuntimePluginName: criRuntimePluginName,
|
||||
Logger: b.logger,
|
||||
RuntimeType: b.runtimeType,
|
||||
UseLegacyConfig: b.useLegacyConfig,
|
||||
ContainerAnnotations: b.containerAnnotations,
|
||||
}
|
||||
|
||||
version, err := cfg.parseVersion(b.useLegacyConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse config version: %v", err)
|
||||
}
|
||||
switch version {
|
||||
switch configVersion {
|
||||
case 1:
|
||||
return (*ConfigV1)(cfg), nil
|
||||
case 2:
|
||||
default:
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported config version: %v", version)
|
||||
}
|
||||
|
||||
// parseVersion returns the version of the config
|
||||
func (c *Config) parseVersion(useLegacyConfig bool) (int, error) {
|
||||
defaultVersion := 2
|
||||
if useLegacyConfig {
|
||||
defaultVersion = 1
|
||||
func (b *builder) parseVersion(c *toml.Tree) (int64, error) {
|
||||
if c == nil || len(c.Keys()) == 0 {
|
||||
// No config exists, or the config file is empty.
|
||||
if b.useLegacyConfig {
|
||||
// If a legacy config is explicitly requested, we default to a v1 config.
|
||||
return 1, nil
|
||||
}
|
||||
// Use the requested version.
|
||||
return int64(b.configVersion), nil
|
||||
}
|
||||
|
||||
switch v := c.Get("version").(type) {
|
||||
case nil:
|
||||
switch len(c.Keys()) {
|
||||
case 0: // No config exists, or the config file is empty, use version inferred from containerd
|
||||
return defaultVersion, nil
|
||||
default: // A config file exists, has content, and no version is set
|
||||
return 1, nil
|
||||
}
|
||||
return 1, nil
|
||||
case int64:
|
||||
return int(v), nil
|
||||
return v, nil
|
||||
default:
|
||||
return -1, fmt.Errorf("unsupported type for version field: %v", v)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) criRuntimePluginName(configVersion int64) (string, error) {
|
||||
switch configVersion {
|
||||
case 1:
|
||||
return "cri", nil
|
||||
case 2:
|
||||
return "io.containerd.grpc.v1.cri", nil
|
||||
default:
|
||||
return "io.containerd.cri.v1.runtime", nil
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Config) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) {
|
||||
if c == nil || c.Tree == nil {
|
||||
return nil, fmt.Errorf("config is nil")
|
||||
}
|
||||
runtimeData := c.GetSubtreeByPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name})
|
||||
runtimeData := c.GetSubtreeByPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name})
|
||||
return &containerdCfgRuntime{
|
||||
tree: runtimeData,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// CommandLineSource returns the CLI-based containerd config loader
|
||||
func CommandLineSource(hostRoot string) toml.Loader {
|
||||
return toml.FromCommandLine(chrootIfRequired(hostRoot, "containerd", "config", "dump")...)
|
||||
func CommandLineSource(hostRoot string, executablePath string) toml.Loader {
|
||||
if executablePath == "" {
|
||||
executablePath = "containerd"
|
||||
}
|
||||
return toml.FromCommandLine(chrootIfRequired(hostRoot, executablePath, "config", "dump")...)
|
||||
}
|
||||
|
||||
func chrootIfRequired(hostRoot string, commandLine ...string) []string {
|
||||
|
||||
@@ -21,16 +21,13 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultRuntimeType = "io.containerd.runc.v2"
|
||||
)
|
||||
|
||||
type builder struct {
|
||||
logger logger.Interface
|
||||
configSource toml.Loader
|
||||
configVersion int
|
||||
useLegacyConfig bool
|
||||
path string
|
||||
runtimeType string
|
||||
useLegacyConfig bool
|
||||
containerAnnotations []string
|
||||
}
|
||||
|
||||
@@ -65,13 +62,20 @@ func WithRuntimeType(runtimeType string) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithUseLegacyConfig sets the useLegacyConfig flag for the config builder
|
||||
// WithUseLegacyConfig sets the useLegacyConfig flag for the config builder.
|
||||
func WithUseLegacyConfig(useLegacyConfig bool) Option {
|
||||
return func(b *builder) {
|
||||
b.useLegacyConfig = useLegacyConfig
|
||||
}
|
||||
}
|
||||
|
||||
// WithConfigVersion sets the config version for the config builder
|
||||
func WithConfigVersion(configVersion int) Option {
|
||||
return func(b *builder) {
|
||||
b.configVersion = configVersion
|
||||
}
|
||||
}
|
||||
|
||||
// WithContainerAnnotations sets the container annotations for the config builder
|
||||
func WithContainerAnnotations(containerAnnotations ...string) Option {
|
||||
return func(b *builder) {
|
||||
|
||||
@@ -153,10 +153,16 @@ func (c *Config) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
// EnableCDI is a no-op for CRI-O since it always enabled where supported.
|
||||
func (c *Config) EnableCDI() {}
|
||||
|
||||
// CommandLineSource returns the CLI-based crio config loader
|
||||
func CommandLineSource(hostRoot string) toml.Loader {
|
||||
func CommandLineSource(hostRoot string, executablePath string) toml.Loader {
|
||||
if executablePath == "" {
|
||||
executablePath = "crio"
|
||||
}
|
||||
return toml.LoadFirst(
|
||||
toml.FromCommandLine(chrootIfRequired(hostRoot, "crio", "status", "config")...),
|
||||
toml.FromCommandLine(chrootIfRequired(hostRoot, executablePath, "status", "config")...),
|
||||
toml.FromCommandLine(chrootIfRequired(hostRoot, "crio-status", "config")...),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -103,6 +103,24 @@ func (c Config) DefaultRuntime() string {
|
||||
return r
|
||||
}
|
||||
|
||||
// EnableCDI sets features.cdi to true in the docker config.
|
||||
func (c *Config) EnableCDI() {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
config := *c
|
||||
|
||||
features, ok := config["features"].(map[string]bool)
|
||||
if !ok {
|
||||
features = make(map[string]bool)
|
||||
}
|
||||
features["cdi"] = true
|
||||
|
||||
config["features"] = features
|
||||
|
||||
*c = config
|
||||
}
|
||||
|
||||
// RemoveRuntime removes a runtime from the docker config
|
||||
func (c *Config) RemoveRuntime(name string) error {
|
||||
if c == nil {
|
||||
@@ -132,11 +150,6 @@ func (c *Config) RemoveRuntime(name string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set sets the specified docker option
|
||||
func (c *Config) Set(key string, value interface{}) {
|
||||
(*c)[key] = value
|
||||
}
|
||||
|
||||
// Save writes the config to the specified path
|
||||
func (c Config) Save(path string) (int64, error) {
|
||||
output, err := json.MarshalIndent(c, "", " ")
|
||||
@@ -166,3 +179,13 @@ func (c *Config) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) {
|
||||
}
|
||||
return &dockerRuntime{}, nil
|
||||
}
|
||||
|
||||
// String returns the string representation of the JSON config.
|
||||
func (c Config) String() string {
|
||||
output, err := json.MarshalIndent(c, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Sprintf("invalid JSON: %v", err)
|
||||
}
|
||||
|
||||
return string(output)
|
||||
}
|
||||
|
||||
26
pkg/config/toml/source-map.go
Normal file
26
pkg/config/toml/source-map.go
Normal file
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package toml
|
||||
|
||||
type tomlMap map[string]interface{}
|
||||
|
||||
var _ Loader = (*tomlFile)(nil)
|
||||
|
||||
// Load loads the contents of the specified TOML file as a map.
|
||||
func (l tomlMap) Load() (*Tree, error) {
|
||||
return LoadMap(l)
|
||||
}
|
||||
26
pkg/config/toml/source-string.go
Normal file
26
pkg/config/toml/source-string.go
Normal file
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package toml
|
||||
|
||||
type tomlString string
|
||||
|
||||
var _ Loader = (*tomlFile)(nil)
|
||||
|
||||
// Load loads the contents of the specified TOML file as a map.
|
||||
func (l tomlString) Load() (*Tree, error) {
|
||||
return Load(string(l))
|
||||
}
|
||||
@@ -25,15 +25,6 @@ type Loader interface {
|
||||
Load() (*Tree, error)
|
||||
}
|
||||
|
||||
// FromFile creates a TOML source from the specified file.
|
||||
// If an empty string is passed an empty toml config is used.
|
||||
func FromFile(path string) Loader {
|
||||
if path == "" {
|
||||
return Empty
|
||||
}
|
||||
return tomlFile(path)
|
||||
}
|
||||
|
||||
// FromCommandLine creates a TOML source from the output of a shell command and its corresponding args.
|
||||
// If the command is empty, an empty config is returned.
|
||||
func FromCommandLine(cmds ...string) Loader {
|
||||
@@ -45,3 +36,30 @@ func FromCommandLine(cmds ...string) Loader {
|
||||
args: cmds[1:],
|
||||
}
|
||||
}
|
||||
|
||||
// FromFile creates a TOML source from the specified file.
|
||||
// If an empty string is passed an empty toml config is used.
|
||||
func FromFile(path string) Loader {
|
||||
if path == "" {
|
||||
return Empty
|
||||
}
|
||||
return tomlFile(path)
|
||||
}
|
||||
|
||||
// FromMap creates a TOML source for the specified map.
|
||||
// If an empty map is passed and empty tomly config is used.
|
||||
func FromMap(m map[string]interface{}) Loader {
|
||||
if m == nil {
|
||||
return Empty
|
||||
}
|
||||
return tomlMap(m)
|
||||
}
|
||||
|
||||
// FromString creates a TOML source for the specified contents.
|
||||
// If an empty string is passed an empty toml config is used.
|
||||
func FromString(contents string) Loader {
|
||||
if contents == "" {
|
||||
return Empty
|
||||
}
|
||||
return tomlString(contents)
|
||||
}
|
||||
|
||||
@@ -53,3 +53,13 @@ type Interface interface {
|
||||
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error)
|
||||
GetDeviceSpecsByID(...string) ([]specs.Device, error)
|
||||
}
|
||||
|
||||
// A HookName refers to one of the predefined set of CDI hooks that may be
|
||||
// included in the generated CDI specification.
|
||||
type HookName string
|
||||
|
||||
const (
|
||||
// HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility.
|
||||
// This was added with v1.17.5 of the NVIDIA Container Toolkit.
|
||||
HookEnableCudaCompat = HookName("enable-cuda-compat")
|
||||
)
|
||||
|
||||
@@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
|
||||
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
|
||||
}
|
||||
|
||||
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib)
|
||||
driverFiles, err := l.NewDriverDiscoverer()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
|
||||
}
|
||||
|
||||
@@ -34,41 +34,41 @@ import (
|
||||
|
||||
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
|
||||
// The supplied NVML Library is used to query the expected driver version.
|
||||
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
func (l *nvmllib) NewDriverDiscoverer() (discover.Discover, error) {
|
||||
if r := l.nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to initialize NVML: %v", r)
|
||||
}
|
||||
defer func() {
|
||||
if r := nvmllib.Shutdown(); r != nvml.SUCCESS {
|
||||
logger.Warningf("failed to shutdown NVML: %v", r)
|
||||
if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS {
|
||||
l.logger.Warningf("failed to shutdown NVML: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
version, r := l.nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||
}
|
||||
|
||||
return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
|
||||
return (*nvcdilib)(l).newDriverVersionDiscoverer(version)
|
||||
}
|
||||
|
||||
func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
|
||||
libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
|
||||
func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover, error) {
|
||||
libraries, err := l.NewDriverLibraryDiscoverer(version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
|
||||
}
|
||||
|
||||
ipcs, err := discover.NewIPCDiscoverer(logger, driver.Root)
|
||||
ipcs, err := discover.NewIPCDiscoverer(l.logger, l.driver.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
|
||||
}
|
||||
|
||||
firmwares, err := NewDriverFirmwareDiscoverer(logger, driver.Root, version)
|
||||
firmwares, err := NewDriverFirmwareDiscoverer(l.logger, l.driver.Root, version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err)
|
||||
}
|
||||
|
||||
binaries := NewDriverBinariesDiscoverer(logger, driver.Root)
|
||||
binaries := NewDriverBinariesDiscoverer(l.logger, l.driver.Root)
|
||||
|
||||
d := discover.Merge(
|
||||
libraries,
|
||||
@@ -81,32 +81,41 @@ func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nv
|
||||
}
|
||||
|
||||
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
|
||||
func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
|
||||
libraryPaths, err := getVersionLibs(logger, driver, version)
|
||||
func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover, error) {
|
||||
libraryPaths, err := getVersionLibs(l.logger, l.driver, version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
|
||||
}
|
||||
|
||||
libraries := discover.NewMounts(
|
||||
logger,
|
||||
l.logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driver.Root),
|
||||
lookup.WithLogger(l.logger),
|
||||
lookup.WithRoot(l.driver.Root),
|
||||
),
|
||||
driver.Root,
|
||||
l.driver.Root,
|
||||
libraryPaths,
|
||||
)
|
||||
|
||||
updateLDCache, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath)
|
||||
var discoverers []discover.Discover
|
||||
|
||||
d := discover.Merge(
|
||||
discover.WithDriverDotSoSymlinks(
|
||||
libraries,
|
||||
version,
|
||||
nvidiaCDIHookPath,
|
||||
),
|
||||
updateLDCache,
|
||||
driverDotSoSymlinksDiscoverer := discover.WithDriverDotSoSymlinks(
|
||||
libraries,
|
||||
version,
|
||||
l.nvidiaCDIHookPath,
|
||||
)
|
||||
discoverers = append(discoverers, driverDotSoSymlinksDiscoverer)
|
||||
|
||||
if l.HookIsSupported(HookEnableCudaCompat) {
|
||||
// TODO: The following should use the version directly.
|
||||
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.nvidiaCDIHookPath, l.driver)
|
||||
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
|
||||
}
|
||||
|
||||
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.nvidiaCDIHookPath, l.ldconfigPath)
|
||||
discoverers = append(discoverers, updateLDCache)
|
||||
|
||||
d := discover.Merge(discoverers...)
|
||||
|
||||
return d, nil
|
||||
}
|
||||
@@ -184,6 +193,8 @@ func NewDriverBinariesDiscoverer(logger logger.Interface, driverRoot string) dis
|
||||
"nvidia-persistenced", /* Persistence mode utility */
|
||||
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
||||
"nvidia-cuda-mps-server", /* Multi process service server */
|
||||
"nvidia-imex", /* NVIDIA IMEX Daemon */
|
||||
"nvidia-imex-ctl", /* NVIDIA IMEX control */
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
30
pkg/nvcdi/hooks.go
Normal file
30
pkg/nvcdi/hooks.go
Normal file
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvcdi
|
||||
|
||||
// disabledHooks allows individual hooks to be disabled.
|
||||
type disabledHooks map[HookName]bool
|
||||
|
||||
// HookIsSupported checks whether a hook of the specified name is supported.
|
||||
// Hooks must be explicitly disabled, meaning that if no disabled hooks are
|
||||
// all hooks are supported.
|
||||
func (l *nvcdilib) HookIsSupported(h HookName) bool {
|
||||
if len(l.disabledHooks) == 0 {
|
||||
return true
|
||||
}
|
||||
return !l.disabledHooks[h]
|
||||
}
|
||||
@@ -66,11 +66,15 @@ type nvcdilib struct {
|
||||
infolib info.Interface
|
||||
|
||||
mergedDeviceOptions []transform.MergedDeviceOption
|
||||
|
||||
disabledHooks disabledHooks
|
||||
}
|
||||
|
||||
// New creates a new nvcdi library
|
||||
func New(opts ...Option) (Interface, error) {
|
||||
l := &nvcdilib{}
|
||||
l := &nvcdilib{
|
||||
disabledHooks: make(disabledHooks),
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(l)
|
||||
}
|
||||
@@ -97,6 +101,7 @@ func New(opts ...Option) (Interface, error) {
|
||||
root.WithLogger(l.logger),
|
||||
root.WithDriverRoot(l.driverRoot),
|
||||
root.WithLibrarySearchPaths(l.librarySearchPaths...),
|
||||
root.WithConfigSearchPaths(l.configSearchPaths...),
|
||||
)
|
||||
if l.nvmllib == nil {
|
||||
var nvmlOpts []nvml.LibraryOption
|
||||
@@ -110,19 +115,24 @@ func New(opts ...Option) (Interface, error) {
|
||||
}
|
||||
l.nvmllib = nvml.New(nvmlOpts...)
|
||||
}
|
||||
if l.nvsandboxutilslib == nil {
|
||||
var nvsandboxutilsOpts []nvsandboxutils.LibraryOption
|
||||
// Set the library path for libnvidia-sandboxutils
|
||||
candidates, err := l.driver.Libraries().Locate("libnvidia-sandboxutils.so.1")
|
||||
if err != nil {
|
||||
l.logger.Warningf("Ignoring error in locating libnvidia-sandboxutils.so.1: %v", err)
|
||||
} else {
|
||||
libNvidiaSandboxutilsPath := candidates[0]
|
||||
l.logger.Infof("Using %v", libNvidiaSandboxutilsPath)
|
||||
nvsandboxutilsOpts = append(nvsandboxutilsOpts, nvsandboxutils.WithLibraryPath(libNvidiaSandboxutilsPath))
|
||||
}
|
||||
l.nvsandboxutilslib = nvsandboxutils.New(nvsandboxutilsOpts...)
|
||||
}
|
||||
// TODO: Repeated calls to nvsandboxutils.Init and Shutdown are causing
|
||||
// segmentation violations. Here we disabled nvsandbox utils unless explicitly
|
||||
// specified.
|
||||
// This will be reenabled as soon as we have more visibility into why this is
|
||||
// happening and a mechanism to detect and disable this if required.
|
||||
// if l.nvsandboxutilslib == nil {
|
||||
// var nvsandboxutilsOpts []nvsandboxutils.LibraryOption
|
||||
// // Set the library path for libnvidia-sandboxutils
|
||||
// candidates, err := l.driver.Libraries().Locate("libnvidia-sandboxutils.so.1")
|
||||
// if err != nil {
|
||||
// l.logger.Warningf("Ignoring error in locating libnvidia-sandboxutils.so.1: %v", err)
|
||||
// } else {
|
||||
// libNvidiaSandboxutilsPath := candidates[0]
|
||||
// l.logger.Infof("Using %v", libNvidiaSandboxutilsPath)
|
||||
// nvsandboxutilsOpts = append(nvsandboxutilsOpts, nvsandboxutils.WithLibraryPath(libNvidiaSandboxutilsPath))
|
||||
// }
|
||||
// l.nvsandboxutilslib = nvsandboxutils.New(nvsandboxutilsOpts...)
|
||||
// }
|
||||
if l.devicelib == nil {
|
||||
l.devicelib = device.New(l.nvmllib)
|
||||
}
|
||||
@@ -146,6 +156,8 @@ func New(opts ...Option) (Interface, error) {
|
||||
if l.vendor == "" {
|
||||
l.vendor = "management.nvidia.com"
|
||||
}
|
||||
// Management containers in general do not require CUDA Forward compatibility.
|
||||
l.disabledHooks[HookEnableCudaCompat] = true
|
||||
lib = (*managementlib)(l)
|
||||
case ModeNvml:
|
||||
lib = (*nvmllib)(l)
|
||||
|
||||
@@ -80,7 +80,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
return nil, fmt.Errorf("failed to get CUDA version: %v", err)
|
||||
}
|
||||
|
||||
driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version)
|
||||
driver, err := (*nvcdilib)(m).newDriverVersionDiscoverer(version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create driver library discoverer: %v", err)
|
||||
}
|
||||
|
||||
@@ -155,3 +155,14 @@ func WithLibrarySearchPaths(paths []string) Option {
|
||||
o.librarySearchPaths = paths
|
||||
}
|
||||
}
|
||||
|
||||
// WithDisabledHook allows specific hooks to the disabled.
|
||||
// This option can be specified multiple times for each hook.
|
||||
func WithDisabledHook(hook HookName) Option {
|
||||
return func(o *nvcdilib) {
|
||||
if o.disabledHooks == nil {
|
||||
o.disabledHooks = make(map[HookName]bool)
|
||||
}
|
||||
o.disabledHooks[hook] = true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,3 @@
|
||||
FROM quay.io/centos/centos:stream8
|
||||
|
||||
RUN sed -i -e "s|mirrorlist=|#mirrorlist=|g" \
|
||||
-e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" \
|
||||
/etc/yum.repos.d/CentOS-Stream-*
|
||||
FROM quay.io/centos/centos:stream9
|
||||
|
||||
RUN yum install -y createrepo rpm-sign pinentry
|
||||
|
||||
@@ -1,76 +0,0 @@
|
||||
#! /bin/bash
|
||||
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
testing::toolkit::install() {
|
||||
local -r uid=$(id -u)
|
||||
local -r gid=$(id -g)
|
||||
|
||||
local READLINK="readlink"
|
||||
local -r platform=$(uname)
|
||||
if [[ "${platform}" == "Darwin" ]]; then
|
||||
READLINK="greadlink"
|
||||
fi
|
||||
|
||||
testing::docker_run::toolkit::shell 'toolkit install --toolkit-root=/usr/local/nvidia/toolkit'
|
||||
docker run --rm -v "${shared_dir}:/work" alpine sh -c "chown -R ${uid}:${gid} /work/"
|
||||
|
||||
# Ensure toolkit dir is correctly setup
|
||||
test ! -z "$(ls -A "${shared_dir}/usr/local/nvidia/toolkit")"
|
||||
|
||||
test -L "${shared_dir}/usr/local/nvidia/toolkit/libnvidia-container.so.1"
|
||||
test -e "$(${READLINK} -f "${shared_dir}/usr/local/nvidia/toolkit/libnvidia-container.so.1")"
|
||||
test -L "${shared_dir}/usr/local/nvidia/toolkit/libnvidia-container-go.so.1"
|
||||
test -e "$(${READLINK} -f "${shared_dir}/usr/local/nvidia/toolkit/libnvidia-container-go.so.1")"
|
||||
|
||||
test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-cli"
|
||||
test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime-hook"
|
||||
test -L "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-toolkit"
|
||||
test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime"
|
||||
|
||||
grep -q -E "nvidia driver modules are not yet loaded, invoking runc directly" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime"
|
||||
grep -q -E "exec runc \".@\"" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime"
|
||||
|
||||
test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-cli.real"
|
||||
test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime-hook.real"
|
||||
test -e "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime.real"
|
||||
|
||||
test -e "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml"
|
||||
|
||||
# Ensure that the config file has the required contents.
|
||||
# NOTE: This assumes that RUN_DIR is '/run/nvidia'
|
||||
local -r nvidia_run_dir="/run/nvidia"
|
||||
grep -q -E "^\s*ldconfig = \"@${nvidia_run_dir}/driver/sbin/ldconfig(.real)?\"" "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml"
|
||||
grep -q -E "^\s*root = \"${nvidia_run_dir}/driver\"" "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml"
|
||||
grep -q -E "^\s*path = \"/usr/local/nvidia/toolkit/nvidia-container-cli\"" "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml"
|
||||
grep -q -E "^\s*path = \"/usr/local/nvidia/toolkit/nvidia-ctk\"" "${shared_dir}/usr/local/nvidia/toolkit/.config/nvidia-container-runtime/config.toml"
|
||||
}
|
||||
|
||||
testing::toolkit::delete() {
|
||||
testing::docker_run::toolkit::shell 'mkdir -p /usr/local/nvidia/delete-toolkit'
|
||||
testing::docker_run::toolkit::shell 'touch /usr/local/nvidia/delete-toolkit/test.file'
|
||||
testing::docker_run::toolkit::shell 'toolkit delete --toolkit-root=/usr/local/nvidia/delete-toolkit'
|
||||
|
||||
test ! -z "$(ls -A "${shared_dir}/usr/local/nvidia")"
|
||||
test ! -e "${shared_dir}/usr/local/nvidia/delete-toolkit"
|
||||
}
|
||||
|
||||
testing::toolkit::main() {
|
||||
testing::toolkit::install
|
||||
testing::toolkit::delete
|
||||
}
|
||||
|
||||
testing::toolkit::cleanup() {
|
||||
:
|
||||
}
|
||||
@@ -19,7 +19,6 @@ shopt -s lastpipe
|
||||
readonly basedir="$(dirname "$(realpath "$0")")"
|
||||
source "${basedir}/common.sh"
|
||||
|
||||
source "${basedir}/toolkit_test.sh"
|
||||
source "${basedir}/docker_test.sh"
|
||||
source "${basedir}/crio_test.sh"
|
||||
source "${basedir}/containerd_test.sh"
|
||||
@@ -66,7 +65,7 @@ done
|
||||
|
||||
trap '"$CLEANUP" && testing::cleanup' ERR
|
||||
|
||||
readonly test_cases="${TEST_CASES:-toolkit docker crio containerd}"
|
||||
readonly test_cases="${TEST_CASES:-docker crio containerd}"
|
||||
|
||||
testing::cleanup
|
||||
for tc in ${test_cases}; do
|
||||
45
tests/e2e/Makefile
Normal file
45
tests/e2e/Makefile
Normal file
@@ -0,0 +1,45 @@
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
GO_CMD ?= go
|
||||
|
||||
include $(CURDIR)/versions.mk
|
||||
|
||||
E2E_RUNTIME ?= docker
|
||||
|
||||
E2E_INSTALL_CTK ?= false
|
||||
|
||||
ifeq ($($(DIST)),)
|
||||
DIST ?= ubuntu20.04
|
||||
endif
|
||||
IMAGE_TAG ?= $(VERSION)-$(DIST)
|
||||
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
|
||||
|
||||
E2E_SSH_KEY ?=
|
||||
E2E_SSH_USER ?=
|
||||
E2E_SSH_HOST ?=
|
||||
E2E_SSH_PORT ?= 22
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
cd $(CURDIR)/tests/e2e && $(GO_CMD) test -v . -args \
|
||||
-ginkgo.focus="$(E2E_RUNTIME)" \
|
||||
-test.timeout=1h \
|
||||
-ginkgo.v \
|
||||
-install-ctk=$(E2E_INSTALL_CTK) \
|
||||
-toolkit-image=$(IMAGE) \
|
||||
-ssh-key=$(E2E_SSH_KEY) \
|
||||
-ssh-user=$(E2E_SSH_USER) \
|
||||
-remote-host=$(E2E_SSH_HOST) \
|
||||
-remote-port=$(E2E_SSH_PORT)
|
||||
63
tests/e2e/e2e_test.go
Normal file
63
tests/e2e/e2e_test.go
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Test context
|
||||
var (
|
||||
ctx context.Context
|
||||
|
||||
installCTK bool
|
||||
|
||||
image string
|
||||
|
||||
sshKey string
|
||||
sshUser string
|
||||
host string
|
||||
sshPort string
|
||||
)
|
||||
|
||||
func init() {
|
||||
flag.BoolVar(&installCTK, "install-ctk", false, "Install the NVIDIA Container Toolkit")
|
||||
flag.StringVar(&image, "toolkit-image", "", "Repository of the image to test")
|
||||
flag.StringVar(&sshKey, "ssh-key", "", "SSH key to use for remote login")
|
||||
flag.StringVar(&sshUser, "ssh-user", "", "SSH user to use for remote login")
|
||||
flag.StringVar(&host, "remote-host", "", "Hostname of the remote machine")
|
||||
flag.StringVar(&sshPort, "remote-port", "22", "SSH port to use for remote login")
|
||||
}
|
||||
|
||||
func TestMain(t *testing.T) {
|
||||
suiteName := "NVIDIA Container Toolkit E2E"
|
||||
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t,
|
||||
suiteName,
|
||||
)
|
||||
}
|
||||
|
||||
// BeforeSuite runs before the test suite
|
||||
var _ = BeforeSuite(func() {
|
||||
ctx = context.Background()
|
||||
})
|
||||
30
tests/e2e/infra/aws.yaml
Normal file
30
tests/e2e/infra/aws.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
apiVersion: holodeck.nvidia.com/v1alpha1
|
||||
kind: Environment
|
||||
metadata:
|
||||
name: HOLODECK_NAME
|
||||
description: "end-to-end test infrastructure"
|
||||
spec:
|
||||
provider: aws
|
||||
auth:
|
||||
keyName: cnt-ci
|
||||
privateKey: HOLODECK_PRIVATE_KEY
|
||||
instance:
|
||||
type: g4dn.xlarge
|
||||
region: us-west-1
|
||||
ingressIpRanges:
|
||||
- 18.190.12.32/32
|
||||
- 3.143.46.93/32
|
||||
- 44.230.241.223/32
|
||||
- 44.235.4.62/32
|
||||
- 52.15.119.136/32
|
||||
- 52.24.205.48/32
|
||||
image:
|
||||
architecture: amd64
|
||||
imageId: ami-0ce2cb35386fc22e9
|
||||
containerRuntime:
|
||||
install: true
|
||||
name: docker
|
||||
nvidiaContainerToolkit:
|
||||
install: false
|
||||
nvidiaDriver:
|
||||
install: true
|
||||
118
tests/e2e/installer.go
Normal file
118
tests/e2e/installer.go
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
// dockerInstallTemplate is a template for installing the NVIDIA Container Toolkit
|
||||
// on a host using Docker.
|
||||
var dockerInstallTemplate = `
|
||||
#! /usr/bin/env bash
|
||||
set -xe
|
||||
|
||||
: ${IMAGE:={{.Image}}}
|
||||
|
||||
# Create a temporary directory
|
||||
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
|
||||
mkdir -p "$TEMP_DIR"
|
||||
|
||||
# Given that docker has an init function that checks for the existence of the
|
||||
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
|
||||
# in the /usr/bin directory.
|
||||
# See https://github.com/moby/moby/blob/20a05dabf44934447d1a66cdd616cc803b81d4e2/daemon/nvidia_linux.go#L32-L46
|
||||
sudo rm -f /usr/bin/nvidia-container-runtime-hook
|
||||
sudo ln -s "$TEMP_DIR/toolkit/nvidia-container-runtime-hook" /usr/bin/nvidia-container-runtime-hook
|
||||
|
||||
docker run --pid=host --rm -i --privileged \
|
||||
-v /:/host \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v "$TEMP_DIR:$TEMP_DIR" \
|
||||
-v /etc/docker:/config-root \
|
||||
${IMAGE} \
|
||||
--root "$TEMP_DIR" \
|
||||
--runtime=docker \
|
||||
--config=/config-root/daemon.json \
|
||||
--driver-root=/ \
|
||||
--no-daemon \
|
||||
--restart-mode=systemd
|
||||
`
|
||||
|
||||
type ToolkitInstaller struct {
|
||||
runner Runner
|
||||
template string
|
||||
|
||||
Image string
|
||||
}
|
||||
|
||||
type installerOption func(*ToolkitInstaller)
|
||||
|
||||
func WithRunner(r Runner) installerOption {
|
||||
return func(i *ToolkitInstaller) {
|
||||
i.runner = r
|
||||
}
|
||||
}
|
||||
|
||||
func WithImage(image string) installerOption {
|
||||
return func(i *ToolkitInstaller) {
|
||||
i.Image = image
|
||||
}
|
||||
}
|
||||
|
||||
func WithTemplate(template string) installerOption {
|
||||
return func(i *ToolkitInstaller) {
|
||||
i.template = template
|
||||
}
|
||||
}
|
||||
|
||||
func NewToolkitInstaller(opts ...installerOption) (*ToolkitInstaller, error) {
|
||||
i := &ToolkitInstaller{
|
||||
runner: localRunner{},
|
||||
template: dockerInstallTemplate,
|
||||
}
|
||||
|
||||
for _, opt := range opts {
|
||||
opt(i)
|
||||
}
|
||||
|
||||
if i.Image == "" {
|
||||
return nil, fmt.Errorf("image is required")
|
||||
}
|
||||
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func (i *ToolkitInstaller) Install() error {
|
||||
// Parse the combined template
|
||||
tmpl, err := template.New("installScript").Parse(i.template)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing template: %w", err)
|
||||
}
|
||||
|
||||
// Execute the template
|
||||
var renderedScript bytes.Buffer
|
||||
err = tmpl.Execute(&renderedScript, i)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error executing template: %w", err)
|
||||
}
|
||||
|
||||
_, _, err = i.runner.Run(renderedScript.String())
|
||||
return err
|
||||
}
|
||||
218
tests/e2e/nvidia-container-toolkit_test.go
Normal file
218
tests/e2e/nvidia-container-toolkit_test.go
Normal file
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Integration tests for Docker runtime
|
||||
var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
|
||||
var r Runner
|
||||
|
||||
// Install the NVIDIA Container Toolkit
|
||||
BeforeAll(func(ctx context.Context) {
|
||||
r = NewRunner(
|
||||
WithHost(host),
|
||||
WithPort(sshPort),
|
||||
WithSshKey(sshKey),
|
||||
WithSshUser(sshUser),
|
||||
)
|
||||
if installCTK {
|
||||
installer, err := NewToolkitInstaller(
|
||||
WithRunner(r),
|
||||
WithImage(image),
|
||||
WithTemplate(dockerInstallTemplate),
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
err = installer.Install()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
}
|
||||
})
|
||||
|
||||
// GPUs are accessible in a container: Running nvidia-smi -L inside the
|
||||
// container shows the same output inside the container as outside the
|
||||
// container. This means that the following commands must all produce
|
||||
// the same output
|
||||
When("running nvidia-smi -L", Ordered, func() {
|
||||
var hostOutput string
|
||||
|
||||
BeforeAll(func(ctx context.Context) {
|
||||
_, _, err := r.Run("docker pull ubuntu")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
hostOutput, _, err = r.Run("nvidia-smi -L")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
||||
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(containerOutput).To(Equal(hostOutput))
|
||||
})
|
||||
|
||||
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
||||
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(containerOutput).To(Equal(hostOutput))
|
||||
})
|
||||
|
||||
It("should support automatic CDI spec generation with the --gpus flag", func(ctx context.Context) {
|
||||
containerOutput, _, err := r.Run("docker run --rm -i --gpus=all --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(containerOutput).To(Equal(hostOutput))
|
||||
})
|
||||
|
||||
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
||||
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(containerOutput).To(Equal(hostOutput))
|
||||
})
|
||||
|
||||
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
||||
containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(containerOutput).To(Equal(hostOutput))
|
||||
})
|
||||
})
|
||||
|
||||
// A vectorAdd sample runs in a container with access to all GPUs.
|
||||
// The following should all produce the same result.
|
||||
When("Running the cuda-vectorAdd sample", Ordered, func() {
|
||||
BeforeAll(func(ctx context.Context) {
|
||||
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
var referenceOutput string
|
||||
|
||||
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
||||
var err error
|
||||
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
|
||||
})
|
||||
|
||||
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
||||
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(referenceOutput).To(Equal(out2))
|
||||
})
|
||||
|
||||
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
||||
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(referenceOutput).To(Equal(out3))
|
||||
})
|
||||
|
||||
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
||||
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(referenceOutput).To(Equal(out4))
|
||||
})
|
||||
})
|
||||
|
||||
// A deviceQuery sample runs in a container with access to all GPUs
|
||||
// The following should all produce the same result.
|
||||
When("Running the cuda-deviceQuery sample", Ordered, func() {
|
||||
BeforeAll(func(ctx context.Context) {
|
||||
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
var referenceOutput string
|
||||
|
||||
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
|
||||
var err error
|
||||
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
|
||||
})
|
||||
|
||||
It("should support automatic CDI spec generation", func(ctx context.Context) {
|
||||
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(referenceOutput).To(Equal(out2))
|
||||
})
|
||||
|
||||
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
|
||||
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(referenceOutput).To(Equal(out3))
|
||||
})
|
||||
|
||||
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
|
||||
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(referenceOutput).To(Equal(out4))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("CUDA Forward compatibility", Ordered, func() {
|
||||
BeforeAll(func(ctx context.Context) {
|
||||
_, _, err := r.Run("docker pull nvcr.io/nvidia/cuda:12.8.0-base-ubi8")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
BeforeAll(func(ctx context.Context) {
|
||||
compatOutput, _, err := r.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(compatOutput).ToNot(BeEmpty())
|
||||
compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.")
|
||||
compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0]
|
||||
|
||||
driverOutput, _, err := r.Run("nvidia-smi -q | grep \"Driver Version\"")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
parts := strings.SplitN(driverOutput, ":", 2)
|
||||
Expect(parts).To(HaveLen(2))
|
||||
|
||||
hostDriverVersion := strings.TrimSpace(parts[1])
|
||||
Expect(hostDriverVersion).ToNot(BeEmpty())
|
||||
driverMajor := strings.SplitN(hostDriverVersion, ".", 2)[0]
|
||||
|
||||
if driverMajor >= compatMajor {
|
||||
GinkgoLogr.Info("CUDA Forward Compatibility tests require an older driver version", "hostDriverVersion", hostDriverVersion, "compatDriverVersion", compatDriverVersion)
|
||||
Skip("CUDA Forward Compatibility tests require an older driver version")
|
||||
}
|
||||
})
|
||||
|
||||
It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
|
||||
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
|
||||
})
|
||||
|
||||
It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
|
||||
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda/compat"))
|
||||
})
|
||||
|
||||
It("should NOT work with nvidia-container-runtime-hook", func(ctx context.Context) {
|
||||
ldconfigOut, _, err := r.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=runc --gpus all nvcr.io/nvidia/cuda:12.8.0-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(ldconfigOut).To(ContainSubstring("/usr/lib64"))
|
||||
})
|
||||
})
|
||||
})
|
||||
171
tests/e2e/runner.go
Normal file
171
tests/e2e/runner.go
Normal file
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
type localRunner struct{}
|
||||
type remoteRunner struct {
|
||||
sshKey string
|
||||
sshUser string
|
||||
host string
|
||||
port string
|
||||
}
|
||||
|
||||
type runnerOption func(*remoteRunner)
|
||||
|
||||
type Runner interface {
|
||||
Run(script string) (string, string, error)
|
||||
}
|
||||
|
||||
func WithSshKey(key string) runnerOption {
|
||||
return func(r *remoteRunner) {
|
||||
r.sshKey = key
|
||||
}
|
||||
}
|
||||
|
||||
func WithSshUser(user string) runnerOption {
|
||||
return func(r *remoteRunner) {
|
||||
r.sshUser = user
|
||||
}
|
||||
}
|
||||
|
||||
func WithHost(host string) runnerOption {
|
||||
return func(r *remoteRunner) {
|
||||
r.host = host
|
||||
}
|
||||
}
|
||||
|
||||
func WithPort(port string) runnerOption {
|
||||
return func(r *remoteRunner) {
|
||||
r.port = port
|
||||
}
|
||||
}
|
||||
|
||||
func NewRunner(opts ...runnerOption) Runner {
|
||||
r := &remoteRunner{}
|
||||
for _, opt := range opts {
|
||||
opt(r)
|
||||
}
|
||||
|
||||
// If the Host is empty, return a local runner
|
||||
if r.host == "" {
|
||||
return localRunner{}
|
||||
}
|
||||
|
||||
// Otherwise, return a remote runner
|
||||
return r
|
||||
}
|
||||
|
||||
func (l localRunner) Run(script string) (string, string, error) {
|
||||
// Create a command to run the script using bash
|
||||
cmd := exec.Command("bash", "-c", script)
|
||||
|
||||
// Buffer to capture standard output
|
||||
var stdout bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
|
||||
// Buffer to capture standard error
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
// Run the command
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("script execution failed: %v\nSTDOUT: %s\nSTDERR: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
|
||||
// Return the captured stdout and nil error
|
||||
return stdout.String(), "", nil
|
||||
}
|
||||
|
||||
func (r remoteRunner) Run(script string) (string, string, error) {
|
||||
// Create a new SSH connection
|
||||
client, err := connectOrDie(r.sshKey, r.sshUser, r.host, r.port)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("failed to connect to %s: %v", r.host, err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
// Create a session
|
||||
session, err := client.NewSession()
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("failed to create session: %v", err)
|
||||
}
|
||||
defer session.Close()
|
||||
|
||||
// Capture stdout and stderr
|
||||
var stdout, stderr bytes.Buffer
|
||||
session.Stdout = &stdout
|
||||
session.Stderr = &stderr
|
||||
|
||||
// Run the script
|
||||
err = session.Run(script)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("script execution failed: %v\nSTDOUT: %s\nSTDERR: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
|
||||
// Return stdout as string if no errors
|
||||
return stdout.String(), "", nil
|
||||
}
|
||||
|
||||
// createSshClient creates a ssh client, and retries if it fails to connect
|
||||
func connectOrDie(sshKey, sshUser, host, port string) (*ssh.Client, error) {
|
||||
var client *ssh.Client
|
||||
var err error
|
||||
key, err := os.ReadFile(sshKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read key file: %v", err)
|
||||
}
|
||||
signer, err := ssh.ParsePrivateKey(key)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse private key: %v", err)
|
||||
}
|
||||
sshConfig := &ssh.ClientConfig{
|
||||
User: sshUser,
|
||||
Auth: []ssh.AuthMethod{
|
||||
ssh.PublicKeys(signer),
|
||||
},
|
||||
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
||||
}
|
||||
|
||||
connectionFailed := false
|
||||
for i := 0; i < 20; i++ {
|
||||
client, err = ssh.Dial("tcp", host+":"+port, sshConfig)
|
||||
if err == nil {
|
||||
return client, nil // Connection succeeded, return the client.
|
||||
}
|
||||
connectionFailed = true
|
||||
// Sleep for a brief moment before retrying.
|
||||
// You can adjust the duration based on your requirements.
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
if connectionFailed {
|
||||
return nil, fmt.Errorf("failed to connect to %s after 10 retries, giving up", host)
|
||||
}
|
||||
|
||||
return client, nil
|
||||
}
|
||||
21
tests/go.mod
Normal file
21
tests/go.mod
Normal file
@@ -0,0 +1,21 @@
|
||||
module github.com/NVIDIA/nvidia-container-toolkit/tests
|
||||
|
||||
go 1.23.2
|
||||
|
||||
require (
|
||||
github.com/onsi/ginkgo/v2 v2.22.2
|
||||
github.com/onsi/gomega v1.36.2
|
||||
golang.org/x/crypto v0.35.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/go-logr/logr v1.4.2 // indirect
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect
|
||||
golang.org/x/net v0.33.0 // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
golang.org/x/text v0.22.0 // indirect
|
||||
golang.org/x/tools v0.28.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
36
tests/go.sum
Normal file
36
tests/go.sum
Normal file
@@ -0,0 +1,36 @@
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg=
|
||||
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
|
||||
github.com/onsi/ginkgo/v2 v2.22.2 h1:/3X8Panh8/WwhU/3Ssa6rCKqPLuAkVY2I0RoyDLySlU=
|
||||
github.com/onsi/ginkgo/v2 v2.22.2/go.mod h1:oeMosUL+8LtarXBHu/c0bx2D/K9zyQ6uX3cTyztHwsk=
|
||||
github.com/onsi/gomega v1.36.2 h1:koNYke6TVk6ZmnyHrCXba/T/MoLBXFjeC1PtvYgw0A8=
|
||||
github.com/onsi/gomega v1.36.2/go.mod h1:DdwyADRjrc825LhMEkD76cHR5+pUnjhUN8GlHlRPHzY=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
|
||||
golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
|
||||
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
|
||||
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU=
|
||||
golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s=
|
||||
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||
golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8=
|
||||
golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw=
|
||||
google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk=
|
||||
google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user