mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
67 Commits
pull-reque
...
release-1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cdfb232d4c | ||
|
|
fa259354ac | ||
|
|
43a3861463 | ||
|
|
9274829517 | ||
|
|
4668c511de | ||
|
|
76ecce5e8f | ||
|
|
cbb66c1a30 | ||
|
|
a714bf2d83 | ||
|
|
de2ed16f6c | ||
|
|
99c8370f25 | ||
|
|
d20b00e360 | ||
|
|
a724207fbd | ||
|
|
37b1e37c8f | ||
|
|
9019dd1d02 | ||
|
|
5605d19133 | ||
|
|
3a0c989066 | ||
|
|
5d246adf3d | ||
|
|
8281e7d341 | ||
|
|
1d046b4a9b | ||
|
|
888ad62c98 | ||
|
|
2fa37973e0 | ||
|
|
d57d83405d | ||
|
|
6b077a2f1c | ||
|
|
4e2861fe77 | ||
|
|
b51d51369d | ||
|
|
cc15f77f9a | ||
|
|
10e86367f9 | ||
|
|
7c6c5e6104 | ||
|
|
323580c2fa | ||
|
|
ae329e3a94 | ||
|
|
99c955a3f4 | ||
|
|
24a48582ca | ||
|
|
a3e0a72fd0 | ||
|
|
2bc874376f | ||
|
|
bd1084d1a1 | ||
|
|
98e5ad0a10 | ||
|
|
3c710a0596 | ||
|
|
38a8bb183a | ||
|
|
7038e7f003 | ||
|
|
539033af43 | ||
|
|
365e9e03b9 | ||
|
|
c1894a0760 | ||
|
|
9ea3360701 | ||
|
|
b6987c526a | ||
|
|
3604927034 | ||
|
|
640bd6ee3f | ||
|
|
6593f3c2e4 | ||
|
|
d52a237c12 | ||
|
|
9d9260db8c | ||
|
|
888fe458ae | ||
|
|
d167812ce3 | ||
|
|
7ff23999e8 | ||
|
|
a9b01a43bc | ||
|
|
ccff00bc30 | ||
|
|
f7d54200c6 | ||
|
|
29fd206f3a | ||
|
|
cfe0d5d07e | ||
|
|
9ab640b2be | ||
|
|
9d2e4b48bc | ||
|
|
c050bcf081 | ||
|
|
27d0fa4ee2 | ||
|
|
e0e22fdceb | ||
|
|
c1eae0deda | ||
|
|
68f0203a49 | ||
|
|
cc688f7c75 | ||
|
|
7566eb124a | ||
|
|
eb5d50abc4 |
@@ -19,6 +19,7 @@ default:
|
||||
|
||||
variables:
|
||||
GIT_SUBMODULE_STRATEGY: recursive
|
||||
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
|
||||
BUILD_MULTI_ARCH_IMAGES: "true"
|
||||
|
||||
stages:
|
||||
@@ -33,7 +34,6 @@ stages:
|
||||
- test
|
||||
- scan
|
||||
- release
|
||||
- sign
|
||||
|
||||
.pipeline-trigger-rules:
|
||||
rules:
|
||||
@@ -145,7 +145,7 @@ trigger-pipeline:
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
|
||||
script:
|
||||
- make -f deployments/container/Makefile test-${DIST}
|
||||
- make -f build/container/Makefile test-${DIST}
|
||||
|
||||
# Define the test targets
|
||||
test-packaging:
|
||||
@@ -178,10 +178,13 @@ test-packaging:
|
||||
OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
before_script:
|
||||
- !reference [.regctl-setup, before_script]
|
||||
# We ensure that the components of the output image are set:
|
||||
- 'echo Image Name: ${OUT_IMAGE_NAME} ; [[ -n "${OUT_IMAGE_NAME}" ]] || exit 1'
|
||||
|
||||
# We ensure that the OUT_IMAGE_VERSION is set
|
||||
- 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1'
|
||||
|
||||
# In the case where we are deploying a different version to the CI_COMMIT_SHA, we
|
||||
# need to tag the image.
|
||||
# Note: a leading 'v' is stripped from the version if present
|
||||
- apk add --no-cache make bash
|
||||
script:
|
||||
# Log in to the "output" registry, tag the image and push the image
|
||||
@@ -192,7 +195,7 @@ test-packaging:
|
||||
|
||||
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
|
||||
# Target
|
||||
- make -f deployments/container/Makefile push-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
# Define a staging release step that pushes an image to an internal "staging" repository
|
||||
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
|
||||
@@ -201,10 +204,10 @@ test-packaging:
|
||||
extends:
|
||||
- .release
|
||||
variables:
|
||||
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
|
||||
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
|
||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${NGC_REGISTRY_STAGING_IMAGE_NAME}"
|
||||
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
||||
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||
OUT_REGISTRY: "${CI_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/container-toolkit"
|
||||
|
||||
# Define an external release step that pushes an image to an external repository.
|
||||
# This includes a devlopment image off main.
|
||||
@@ -222,6 +225,13 @@ test-packaging:
|
||||
OUT_IMAGE_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}"
|
||||
|
||||
# Define the release jobs
|
||||
release:staging-centos7:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-centos7
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
release:staging-ubi8:
|
||||
extends:
|
||||
- .release:staging
|
||||
|
||||
3
.github/copy-pr-bot.yaml
vendored
3
.github/copy-pr-bot.yaml
vendored
@@ -1,3 +0,0 @@
|
||||
# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/#configuration
|
||||
|
||||
enabled: true
|
||||
120
.github/dependabot.yml
vendored
120
.github/dependabot.yml
vendored
@@ -1,120 +0,0 @@
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
# main branch
|
||||
- package-ecosystem: "gomod"
|
||||
target-branch: main
|
||||
directories:
|
||||
- "/"
|
||||
- "deployments/devel"
|
||||
- "tests"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
groups:
|
||||
k8sio:
|
||||
patterns:
|
||||
- k8s.io/*
|
||||
exclude-patterns:
|
||||
- k8s.io/klog/*
|
||||
|
||||
- package-ecosystem: "docker"
|
||||
target-branch: main
|
||||
directories:
|
||||
# CUDA image
|
||||
- "/deployments/container"
|
||||
# Golang version
|
||||
- "/deployments/devel"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
target-branch: main
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
# Allow dependabot to update the libnvidia-container submodule.
|
||||
- package-ecosystem: "gitsubmodule"
|
||||
target-branch: main
|
||||
directory: "/"
|
||||
allow:
|
||||
- dependency-name: "third_party/libnvidia-container"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
- libnvidia-container
|
||||
|
||||
# The release branch(es):
|
||||
- package-ecosystem: "gomod"
|
||||
target-branch: release-1.17
|
||||
directories:
|
||||
- "/"
|
||||
# We don't update development or test dependencies on release branches
|
||||
# - "deployments/devel"
|
||||
# - "tests"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
labels:
|
||||
- dependencies
|
||||
- maintenance
|
||||
ignore:
|
||||
# For release branches we only consider patch updates.
|
||||
- dependency-name: "*"
|
||||
update-types:
|
||||
- version-update:semver-major
|
||||
- version-update:semver-minor
|
||||
groups:
|
||||
k8sio:
|
||||
patterns:
|
||||
- k8s.io/*
|
||||
exclude-patterns:
|
||||
- k8s.io/klog/*
|
||||
|
||||
- package-ecosystem: "docker"
|
||||
target-branch: release-1.17
|
||||
directories:
|
||||
# CUDA image
|
||||
- "/deployments/container"
|
||||
# Golang version
|
||||
- "/deployments/devel"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
ignore:
|
||||
# For release branches we only apply patch updates to the golang version.
|
||||
- dependency-name: "*golang*"
|
||||
update-types:
|
||||
- version-update:semver-major
|
||||
- version-update:semver-minor
|
||||
labels:
|
||||
- dependencies
|
||||
- maintenance
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
target-branch: release-1.17
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
labels:
|
||||
- dependencies
|
||||
- maintenance
|
||||
|
||||
# Github actions need to be gh-pages branches.
|
||||
- package-ecosystem: "github-actions"
|
||||
target-branch: gh-pages
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
53
.github/workflows/ci.yaml
vendored
53
.github/workflows/ci.yaml
vendored
@@ -1,53 +0,0 @@
|
||||
# Copyright 2025 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: CI Pipeline
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "pull-request/[0-9]+"
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
code-scanning:
|
||||
uses: ./.github/workflows/code_scanning.yaml
|
||||
|
||||
variables:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
steps:
|
||||
- name: Generate Commit Short SHA
|
||||
id: version
|
||||
run: echo "version=$(echo $GITHUB_SHA | cut -c1-8)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
golang:
|
||||
uses: ./.github/workflows/golang.yaml
|
||||
|
||||
image:
|
||||
uses: ./.github/workflows/image.yaml
|
||||
needs: [variables, golang, code-scanning]
|
||||
secrets: inherit
|
||||
with:
|
||||
version: ${{ needs.variables.outputs.version }}
|
||||
build_multi_arch_images: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release-') }}
|
||||
|
||||
e2e-test:
|
||||
needs: [image, variables]
|
||||
secrets: inherit
|
||||
uses: ./.github/workflows/e2e.yaml
|
||||
with:
|
||||
version: ${{ needs.variables.outputs.version }}
|
||||
49
.github/workflows/code_scanning.yaml
vendored
49
.github/workflows/code_scanning.yaml
vendored
@@ -1,49 +0,0 @@
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
workflow_call: {}
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze Go code with CodeQL
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 360
|
||||
permissions:
|
||||
security-events: write
|
||||
packages: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: go
|
||||
build-mode: manual
|
||||
- shell: bash
|
||||
run: |
|
||||
make build
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: "/language:go"
|
||||
103
.github/workflows/e2e.yaml
vendored
103
.github/workflows/e2e.yaml
vendored
@@ -1,103 +0,0 @@
|
||||
# Copyright 2025 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: End-to-end Tests
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
required: true
|
||||
type: string
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID:
|
||||
required: true
|
||||
AWS_SECRET_ACCESS_KEY:
|
||||
required: true
|
||||
AWS_SSH_KEY:
|
||||
required: true
|
||||
E2E_SSH_USER:
|
||||
required: true
|
||||
SLACK_BOT_TOKEN:
|
||||
required: true
|
||||
SLACK_CHANNEL_ID:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
e2e-tests:
|
||||
runs-on: linux-amd64-cpu4
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Calculate build vars
|
||||
id: vars
|
||||
run: |
|
||||
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
|
||||
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
|
||||
GOLANG_VERSION=$(./hack/golang-version.sh)
|
||||
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
|
||||
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GOLANG_VERSION }}
|
||||
|
||||
- name: Set up Holodeck
|
||||
uses: NVIDIA/holodeck@v0.2.12
|
||||
with:
|
||||
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws_ssh_key: ${{ secrets.AWS_SSH_KEY }}
|
||||
holodeck_config: "tests/e2e/infra/aws.yaml"
|
||||
|
||||
- name: Get public dns name
|
||||
id: holodeck_public_dns_name
|
||||
uses: mikefarah/yq@master
|
||||
with:
|
||||
cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
|
||||
|
||||
- name: Run e2e tests
|
||||
env:
|
||||
E2E_INSTALL_CTK: "true"
|
||||
E2E_IMAGE_NAME: ghcr.io/nvidia/container-toolkit
|
||||
E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
|
||||
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
|
||||
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
|
||||
run: |
|
||||
e2e_ssh_key=$(mktemp)
|
||||
echo "${{ secrets.AWS_SSH_KEY }}" > "$e2e_ssh_key"
|
||||
chmod 600 "$e2e_ssh_key"
|
||||
export E2E_SSH_KEY="$e2e_ssh_key"
|
||||
|
||||
make -f tests/e2e/Makefile test
|
||||
|
||||
- name: Archive Ginkgo logs
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ginkgo-logs
|
||||
path: ginkgo.json
|
||||
retention-days: 15
|
||||
- name: Send Slack alert notification
|
||||
if: ${{ failure() }}
|
||||
uses: slackapi/slack-github-action@v2.1.0
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: ${{ secrets.SLACK_CHANNEL_ID }}
|
||||
text: |
|
||||
:x: On repository ${{ github.repository }}, the Workflow *${{ github.workflow }}* has failed.
|
||||
|
||||
Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
102
.github/workflows/golang.yaml
vendored
102
.github/workflows/golang.yaml
vendored
@@ -1,102 +0,0 @@
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: Golang
|
||||
|
||||
on:
|
||||
workflow_call: {}
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Checkout code
|
||||
|
||||
- name: Get Golang version
|
||||
id: vars
|
||||
run: |
|
||||
GOLANG_VERSION=$(./hack/golang-version.sh)
|
||||
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
|
||||
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GOLANG_VERSION }}
|
||||
|
||||
- name: Lint
|
||||
uses: golangci/golangci-lint-action@v8
|
||||
with:
|
||||
version: latest
|
||||
args: -v --timeout 5m
|
||||
skip-cache: true
|
||||
|
||||
- name: Check golang modules
|
||||
run: |
|
||||
make check-vendor
|
||||
make -C deployments/devel check-modules
|
||||
|
||||
test:
|
||||
name: Unit test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get Golang version
|
||||
id: vars
|
||||
run: |
|
||||
GOLANG_VERSION=$(./hack/golang-version.sh)
|
||||
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
|
||||
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GOLANG_VERSION }}
|
||||
|
||||
- name: Run unit tests and generate coverage report
|
||||
run: make coverage
|
||||
|
||||
- name: Upload to Coveralls
|
||||
uses: coverallsapp/github-action@v2
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
file: coverage.out
|
||||
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get Golang version
|
||||
id: vars
|
||||
run: |
|
||||
GOLANG_VERSION=$(./hack/golang-version.sh)
|
||||
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
|
||||
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GOLANG_VERSION }}
|
||||
|
||||
- run: make build
|
||||
126
.github/workflows/image.yaml
vendored
126
.github/workflows/image.yaml
vendored
@@ -1,126 +0,0 @@
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Run this workflow on pull requests
|
||||
name: image
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
required: true
|
||||
type: string
|
||||
build_multi_arch_images:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
packages:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
target:
|
||||
- ubuntu18.04-arm64
|
||||
- ubuntu18.04-amd64
|
||||
- ubuntu18.04-ppc64le
|
||||
- centos7-aarch64
|
||||
- centos7-x86_64
|
||||
- centos8-ppc64le
|
||||
ispr:
|
||||
- ${{ github.ref_name != 'main' && !startsWith( github.ref_name, 'release-' ) }}
|
||||
exclude:
|
||||
- ispr: true
|
||||
target: ubuntu18.04-arm64
|
||||
- ispr: true
|
||||
target: ubuntu18.04-ppc64le
|
||||
- ispr: true
|
||||
target: centos7-aarch64
|
||||
- ispr: true
|
||||
target: centos8-ppc64le
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:master
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: build ${{ matrix.target }} packages
|
||||
run: |
|
||||
sudo apt-get install -y coreutils build-essential sed git bash make
|
||||
echo "Building packages"
|
||||
./scripts/build-packages.sh ${{ matrix.target }}
|
||||
|
||||
- name: 'Upload Artifacts'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
compression-level: 0
|
||||
name: toolkit-container-${{ matrix.target }}-${{ github.run_id }}
|
||||
path: ${{ github.workspace }}/dist/*
|
||||
|
||||
image:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
dist:
|
||||
- ubuntu20.04
|
||||
- ubi8
|
||||
- packaging
|
||||
ispr:
|
||||
- ${{ github.ref_name != 'main' && !startsWith( github.ref_name, 'release-' ) }}
|
||||
exclude:
|
||||
- ispr: true
|
||||
dist: ubi8
|
||||
needs: packages
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:master
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Get built packages
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: ${{ github.workspace }}/dist/
|
||||
pattern: toolkit-container-*-${{ github.run_id }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build image
|
||||
env:
|
||||
IMAGE_NAME: ghcr.io/nvidia/container-toolkit
|
||||
VERSION: ${{ inputs.version }}
|
||||
PUSH_ON_BUILD: "true"
|
||||
BUILD_MULTI_ARCH_IMAGES: ${{ inputs.build_multi_arch_images }}
|
||||
run: |
|
||||
echo "${VERSION}"
|
||||
make -f deployments/container/Makefile build-${{ matrix.dist }}
|
||||
22
.github/workflows/pre-sanity.yml
vendored
Normal file
22
.github/workflows/pre-sanity.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: Run pre sanity
|
||||
|
||||
# run this workflow for each commit
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Build dev image
|
||||
run: make .build-image
|
||||
|
||||
- name: Build
|
||||
run: make docker-build
|
||||
|
||||
- name: Tests
|
||||
run: make docker-coverage
|
||||
|
||||
- name: Checks
|
||||
run: make docker-check
|
||||
38
.github/workflows/release.yaml
vendored
38
.github/workflows/release.yaml
vendored
@@ -1,38 +0,0 @@
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Run this workflow on new tags
|
||||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
|
||||
jobs:
|
||||
release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
|
||||
- name: Prepare Artifacts
|
||||
run: |
|
||||
./hack/prepare-artifacts.sh ${{ github.ref_name }}
|
||||
|
||||
- name: Create Draft Release
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
./hack/create-release.sh ${{ github.ref_name }}
|
||||
16
.gitignore
vendored
16
.gitignore
vendored
@@ -1,11 +1,13 @@
|
||||
/dist
|
||||
/artifacts
|
||||
dist
|
||||
artifacts
|
||||
*.swp
|
||||
*.swo
|
||||
/coverage.out*
|
||||
/tests/output/
|
||||
/nvidia-*
|
||||
/test/output/
|
||||
/nvidia-container-runtime
|
||||
/nvidia-container-runtime.*
|
||||
/nvidia-container-runtime-hook
|
||||
/nvidia-container-toolkit
|
||||
/nvidia-ctk
|
||||
/shared-*
|
||||
/release-*
|
||||
/bin
|
||||
/toolkit-test
|
||||
/release-*
|
||||
@@ -15,6 +15,68 @@
|
||||
include:
|
||||
- .common-ci.yml
|
||||
|
||||
build-dev-image:
|
||||
stage: image
|
||||
script:
|
||||
- apk --no-cache add make bash
|
||||
- make .build-image
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- make .push-build-image
|
||||
|
||||
.requires-build-image:
|
||||
image: "${BUILDIMAGE}"
|
||||
|
||||
.go-check:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-checks
|
||||
|
||||
fmt:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make assert-fmt
|
||||
|
||||
vet:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make vet
|
||||
|
||||
lint:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make lint
|
||||
allow_failure: true
|
||||
|
||||
ineffassign:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make ineffassign
|
||||
allow_failure: true
|
||||
|
||||
misspell:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make misspell
|
||||
|
||||
go-build:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-build
|
||||
script:
|
||||
- make build
|
||||
|
||||
unit-tests:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: unit-tests
|
||||
script:
|
||||
- make coverage
|
||||
|
||||
# Define the package build helpers
|
||||
.multi-arch-build:
|
||||
before_script:
|
||||
@@ -126,7 +188,15 @@ package-ubuntu18.04-ppc64le:
|
||||
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
script:
|
||||
- make -f deployments/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos7
|
||||
needs:
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
@@ -176,6 +246,12 @@ image-packaging:
|
||||
optional: true
|
||||
|
||||
# Define publish test helpers
|
||||
.test:toolkit:
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "toolkit"
|
||||
|
||||
.test:docker:
|
||||
extends:
|
||||
- .integration
|
||||
@@ -226,3 +302,4 @@ test-docker-ubuntu20.04:
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
|
||||
4
.gitmodules
vendored
4
.gitmodules
vendored
@@ -1,4 +1,4 @@
|
||||
[submodule "third_party/libnvidia-container"]
|
||||
path = third_party/libnvidia-container
|
||||
url = https://github.com/NVIDIA/libnvidia-container.git
|
||||
branch = main
|
||||
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
|
||||
branch = release-1.14
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
version: "2"
|
||||
linters:
|
||||
enable:
|
||||
- contextcheck
|
||||
- gocritic
|
||||
- gosec
|
||||
- misspell
|
||||
- unconvert
|
||||
exclusions:
|
||||
generated: lax
|
||||
presets:
|
||||
- comments
|
||||
- common-false-positives
|
||||
- legacy
|
||||
- std-error-handling
|
||||
rules:
|
||||
# Exclude the gocritic dupSubExpr issue for cgo files.
|
||||
- linters:
|
||||
- gocritic
|
||||
path: internal/dxcore/dxcore.go
|
||||
text: dupSubExpr
|
||||
# Exclude the checks for usage of returns to config.Delete(Path) in the
|
||||
# crio and containerd config packages.
|
||||
- linters:
|
||||
- errcheck
|
||||
path: pkg/config/engine/
|
||||
text: config.Delete
|
||||
# RENDERD refers to the Render Device and not the past tense of render.
|
||||
- linters:
|
||||
- misspell
|
||||
path: .*.go
|
||||
text: '`RENDERD` is a misspelling of `RENDERED`'
|
||||
# The legacy hook relies on spec.Hooks.Prestart, which is deprecated as of
|
||||
# the v1.2.0 OCI runtime spec.
|
||||
- path: (.+)\.go$
|
||||
text: SA1019:(.+).Prestart is deprecated(.+)
|
||||
# TODO: We should address each of the following integer overflows.
|
||||
- path: (.+)\.go$
|
||||
text: 'G115: integer overflow conversion(.+)'
|
||||
paths:
|
||||
- third_party$
|
||||
- builtin$
|
||||
- examples$
|
||||
formatters:
|
||||
enable:
|
||||
- gofmt
|
||||
- goimports
|
||||
settings:
|
||||
goimports:
|
||||
local-prefixes:
|
||||
- github.com/NVIDIA/nvidia-container-toolkit
|
||||
exclusions:
|
||||
generated: lax
|
||||
paths:
|
||||
- third_party$
|
||||
- builtin$
|
||||
- examples$
|
||||
@@ -33,7 +33,7 @@ variables:
|
||||
# On the multi-arch builder we don't need the qemu setup.
|
||||
SKIP_QEMU_SETUP: "1"
|
||||
# Define the public staging registry
|
||||
STAGING_REGISTRY: ghcr.io/nvidia
|
||||
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
|
||||
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
|
||||
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
|
||||
KITMAKER_RELEASE_FOLDER: "kitmaker"
|
||||
@@ -67,7 +67,12 @@ variables:
|
||||
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
|
||||
script:
|
||||
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
|
||||
- make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
|
||||
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .image-pull
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
@@ -127,6 +132,14 @@ image-packaging:
|
||||
- policy_evaluation.json
|
||||
|
||||
# Define the scan targets
|
||||
scan-centos7-amd64:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
scan-ubuntu20.04-amd64:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
@@ -204,6 +217,23 @@ release:packages:kitmaker:
|
||||
extends:
|
||||
- .release:packages
|
||||
|
||||
release:archive:
|
||||
extends:
|
||||
- .release:external
|
||||
needs:
|
||||
- image-packaging
|
||||
variables:
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
PACKAGE_REGISTRY: "${CI_REGISTRY}"
|
||||
PACKAGE_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
||||
PACKAGE_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
|
||||
PACKAGE_ARCHIVE_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${PACKAGE_ARCHIVE_RELEASE_FOLDER}"
|
||||
script:
|
||||
- apk add --no-cache bash git
|
||||
- ./scripts/archive-packages.sh "${PACKAGE_ARCHIVE_ARTIFACTORY_REPO}"
|
||||
|
||||
release:staging-ubuntu20.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
@@ -213,6 +243,11 @@ release:staging-ubuntu20.04:
|
||||
|
||||
# Define the external release targets
|
||||
# Release to NGC
|
||||
release:ngc-centos7:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubuntu20.04:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
@@ -227,62 +262,3 @@ release:ngc-packaging:
|
||||
extends:
|
||||
- .dist-packaging
|
||||
- .release:ngc
|
||||
|
||||
# Define the external image signing steps for NGC
|
||||
# Download the ngc cli binary for use in the sign steps
|
||||
.ngccli-setup:
|
||||
before_script:
|
||||
- apt-get update && apt-get install -y curl unzip jq
|
||||
- |
|
||||
if [ -z "${NGCCLI_VERSION}" ]; then
|
||||
NGC_VERSION_URL="https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions"
|
||||
# Extract the latest version from the JSON data using jq
|
||||
export NGCCLI_VERSION=$(curl -s $NGC_VERSION_URL | jq -r '.recipe.latestVersionIdStr')
|
||||
fi
|
||||
echo "NGCCLI_VERSION ${NGCCLI_VERSION}"
|
||||
- curl -sSLo ngccli_linux.zip https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/${NGCCLI_VERSION}/files/ngccli_linux.zip
|
||||
- unzip ngccli_linux.zip
|
||||
- chmod u+x ngc-cli/ngc
|
||||
|
||||
# .sign forms the base of the deployment jobs which signs images in the CI registry.
|
||||
# This is extended with the image name and version to be deployed.
|
||||
.sign:ngc:
|
||||
image: ubuntu:latest
|
||||
stage: sign
|
||||
rules:
|
||||
- if: $CI_COMMIT_TAG
|
||||
variables:
|
||||
NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}"
|
||||
IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||
IMAGE_TAG: "${CI_COMMIT_TAG}-${DIST}"
|
||||
retry:
|
||||
max: 2
|
||||
before_script:
|
||||
- !reference [.ngccli-setup, before_script]
|
||||
# We ensure that the IMAGE_NAME and IMAGE_TAG is set
|
||||
- 'echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1'
|
||||
- 'echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1'
|
||||
script:
|
||||
- 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"'
|
||||
- ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia
|
||||
|
||||
sign:ngc-ubuntu20.04:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
- .sign:ngc
|
||||
needs:
|
||||
- release:ngc-ubuntu20.04
|
||||
|
||||
sign:ngc-ubi8:
|
||||
extends:
|
||||
- .dist-ubi8
|
||||
- .sign:ngc
|
||||
needs:
|
||||
- release:ngc-ubi8
|
||||
|
||||
sign:ngc-packaging:
|
||||
extends:
|
||||
- .dist-packaging
|
||||
- .sign:ngc
|
||||
needs:
|
||||
- release:ngc-packaging
|
||||
|
||||
181
CHANGELOG.md
181
CHANGELOG.md
@@ -1,176 +1,27 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.17.4
|
||||
- Disable mounting of compat libs from container by default
|
||||
- Add allow-cuda-compat-libs-from-container feature flag
|
||||
- Skip graphics modifier in CSV mode
|
||||
- Properly pass configSearchPaths to a Driver constructor
|
||||
- Add support for containerd version 3 config
|
||||
- Add string TOML source
|
||||
|
||||
### Changes in libnvidia-container
|
||||
- Add no-cntlibs CLI option to nvidia-container-cli
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
- Bump CUDA base image version to 12.6.3
|
||||
|
||||
## v1.17.3
|
||||
- Only allow host-relative LDConfig paths by default.
|
||||
### Changes in libnvidia-container
|
||||
- Create virtual copy of host ldconfig binary before calling fexecve()
|
||||
|
||||
## v1.17.2
|
||||
- Fixed a bug where legacy images would set imex channels as `all`.
|
||||
|
||||
## v1.17.1
|
||||
- Fixed a bug where specific symlinks existing in a container image could cause a container to fail to start.
|
||||
- Fixed a bug on Tegra-based systems where a container would fail to start.
|
||||
- Fixed a bug where the default container runtime config path was not properly set.
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
- Fallback to using a config file if the current runtime config can not be determined from the command line.
|
||||
|
||||
## v1.17.0
|
||||
- Promote v1.17.0-rc.2 to v1.17.0
|
||||
- Fix bug when using just-in-time CDI spec generation
|
||||
- Check for valid paths in create-symlinks hook
|
||||
|
||||
## v1.17.0-rc.2
|
||||
- Fix bug in locating libcuda.so from ldcache
|
||||
- Fix bug in sorting of symlink chain
|
||||
- Remove unsupported print-ldcache command
|
||||
- Remove csv-filename support from create-symlinks
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
- Fallback to `crio-status` if `crio status` does not work when configuring the crio runtime
|
||||
|
||||
## v1.17.0-rc.1
|
||||
- Allow IMEX channels to be requested as volume mounts
|
||||
- Fix typo in error message
|
||||
- Add disable-imex-channel-creation feature flag
|
||||
- Add -z,lazy to LDFLAGS
|
||||
- Add imex channels to management CDI spec
|
||||
- Add support to fetch current container runtime config from the command line.
|
||||
- Add creation of select driver symlinks to CDI spec generation.
|
||||
- Remove support for config overrides when configuring runtimes.
|
||||
- Skip explicit creation of libnvidia-allocator.so.1 symlink
|
||||
- Add vdpau as as a driver library search path.
|
||||
- Add support for using libnvsandboxutils to generate CDI specifications.
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
|
||||
- Allow opt-in features to be selected when deploying the toolkit-container.
|
||||
- Bump CUDA base image version to 12.6.2
|
||||
- Remove support for config overrides when configuring runtimes.
|
||||
|
||||
### Changes in libnvidia-container
|
||||
|
||||
- Add no-create-imex-channels command line option.
|
||||
|
||||
## v1.16.2
|
||||
- Exclude libnvidia-allocator from graphics mounts. This fixes a bug that leaks mounts when a container is started with bi-directional mount propagation.
|
||||
- Use empty string for default runtime-config-override. This removes a redundant warning for runtimes (e.g. Docker) where this is not applicable.
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
- Bump CUDA base image version to 12.6.0
|
||||
|
||||
### Changes in libnvidia-container
|
||||
- Add no-gsp-firmware command line option
|
||||
- Add no-fabricmanager command line option
|
||||
- Add no-persistenced command line option
|
||||
- Skip directories and symlinks when mounting libraries.
|
||||
|
||||
## v1.16.1
|
||||
- Fix bug with processing errors during CDI spec generation for MIG devices
|
||||
|
||||
## v1.16.0
|
||||
- Promote v1.16.0-rc.2 to v1.16.0
|
||||
|
||||
### Changes in the Toolkit Container
|
||||
- Bump CUDA base image version to 12.5.1
|
||||
|
||||
## v1.16.0-rc.2
|
||||
- Use relative path to locate driver libraries
|
||||
- Add RelativeToRoot function to Driver
|
||||
- Inject additional libraries for full X11 functionality
|
||||
- Extract options from default runtime if runc does not exist
|
||||
- Avoid using map pointers as maps are always passed by reference
|
||||
- Reduce logging for the NVIDIA Container runtime
|
||||
- Fix bug in argument parsing for logger creation
|
||||
|
||||
## v1.16.0-rc.1
|
||||
|
||||
- Support vulkan ICD files directly in a driver root. This allows for the discovery of vulkan files in GKE driver installations.
|
||||
- Increase priority of ld.so.conf.d config file injected into container. This ensures that injected libraries are preferred over libraries present in the container.
|
||||
- Set default CDI spec permissions to 644. This fixes permission issues when using the `nvidia-ctk cdi transform` functions.
|
||||
- Add `dev-root` option to `nvidia-ctk system create-device-nodes` command.
|
||||
- Fix location of `libnvidia-ml.so.1` when a non-standard driver root is used. This enabled CDI spec generation when using the driver container on a host.
|
||||
- Recalculate minimum required CDI spec version on save.
|
||||
- Move `nvidia-ctk hook` commands to a separate `nvidia-cdi-hook` binary. The same subcommands are supported.
|
||||
- Use `:` as an `nvidia-ctk config --set` list separator. This fixes a bug when trying to set config options that are lists.
|
||||
|
||||
- [toolkit-container] Bump CUDA base image version to 12.5.0
|
||||
- [toolkit-container] Allow the path to `toolkit.pid` to be specified directly.
|
||||
- [toolkit-container] Remove provenance information from image manifests.
|
||||
- [toolkit-container] Add `dev-root` option when configuring the toolkit. This adds support for GKE driver installations.
|
||||
|
||||
## v1.15.0
|
||||
|
||||
* Remove `nvidia-container-runtime` and `nvidia-docker2` packages.
|
||||
* Use `XDG_DATA_DIRS` environment variable when locating config files such as graphics config files.
|
||||
* Add support for v0.7.0 Container Device Interface (CDI) specification.
|
||||
* Add `--config-search-path` option to `nvidia-ctk cdi generate` command. These paths are used when locating driver files such as graphics config files.
|
||||
* Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
|
||||
* Add support for v1.2.0 OCI Runtime specification.
|
||||
* Explicitly set `NVIDIA_VISIBLE_DEVICES=void` in generated CDI specifications. This prevents the NVIDIA Container Runtime from making additional modifications.
|
||||
|
||||
* [libnvidia-container] Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
|
||||
|
||||
* [toolkit-container] Bump CUDA base image version to 12.4.1
|
||||
|
||||
## v1.15.0-rc.4
|
||||
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
|
||||
## v1.14.6
|
||||
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
|
||||
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
|
||||
CDI spec to be generated that includes GPUs by index and UUID.
|
||||
* Set the default `--device-name-strategy` for the `nvidia-ctk cdi generate` command to `[index, uuid]`.
|
||||
* Remove `libnvidia-container0` jetpack dependency included for legacy Tegra-based systems.
|
||||
* Add `NVIDIA_VISIBLE_DEVICES=void` to generated CDI specifications.
|
||||
* Add support for selecting IMEX channels using the NVIDIA_IMEX_CHANNELS environement variable.
|
||||
|
||||
* [toolkit-container] Remove centos7 image. The ubi8 image can be used on all RPM-based platforms.
|
||||
* [toolkit-container] Bump CUDA base image version to 12.3.2
|
||||
## v1.14.5
|
||||
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker. This was incorrectly setting `experimental = true` instead
|
||||
of setting `features.cdi = true`.
|
||||
|
||||
## v1.15.0-rc.3
|
||||
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.
|
||||
|
||||
## v1.15.0-rc.2
|
||||
* Extend the `runtime.nvidia.com/gpu` CDI kind to support full-GPUs and MIG devices specified by index or UUID.
|
||||
* Fix bug when specifying `--dev-root` for Tegra-based systems.
|
||||
* Log explicitly requested runtime mode.
|
||||
* Remove package dependency on libseccomp.
|
||||
* Added detection of libnvdxgdmal.so.1 on WSL2
|
||||
* Use devRoot to resolve MIG device nodes.
|
||||
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
|
||||
* Add `crun` to the list of configured low-level runtimes.
|
||||
* Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command.
|
||||
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker.
|
||||
* Add discovery of the GDRCopy device (`gdrdrv`) if the `NVIDIA_GDRCOPY` environment variable of the container is set to `enabled`
|
||||
|
||||
* [toolkit-container] Bump CUDA base image version to 12.3.1.
|
||||
|
||||
## v1.15.0-rc.1
|
||||
* Skip update of ldcache in containers without ldconfig. The .so.SONAME symlinks are still created.
|
||||
* Normalize ldconfig path on use. This automatically adjust the ldconfig setting applied to ldconfig.real on systems where this exists.
|
||||
## v1.14.4
|
||||
* Include `nvidia/nvoptix.bin` in list of graphics mounts.
|
||||
* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts.
|
||||
* Add support for `--library-search-paths` to `nvidia-ctk cdi generate` command.
|
||||
* Add support for injecting /dev/nvidia-nvswitch* devices if the NVIDIA_NVSWITCH=enabled envvar is specified.
|
||||
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
|
||||
* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly.
|
||||
* Add `--relative-to` option to `nvidia-ctk transform root` command. This controls whether the root transformation is applied to host or container paths.
|
||||
* Added automatic CDI spec generation when the `runtime.nvidia.com/gpu=all` device is requested by a container.
|
||||
* Log explicitly requested runtime mode.
|
||||
* Remove package dependency on libseccomp.
|
||||
* Added detection of libnvdxgdmal.so.1 on WSL2.
|
||||
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
|
||||
* Add `crun` to the list of configured low-level runtimes.
|
||||
* Add `--cdi.enabled` option to `nvidia-ctk runtime configure` command to enable CDI in containerd.
|
||||
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
|
||||
|
||||
* [libnvidia-container] Fix device permission check when using cgroupv2 (fixes #227)
|
||||
* [toolkit-container] Bump CUDA base image version to 12.3.1.
|
||||
* [libnvidia-container] Added detection of libnvdxgdmal.so.1 on WSL2.
|
||||
|
||||
## v1.14.3
|
||||
* [toolkit-container] Bump CUDA base image version to 12.2.2.
|
||||
@@ -202,7 +53,7 @@
|
||||
## v1.14.0-rc.2
|
||||
* Fix bug causing incorrect nvidia-smi symlink to be created on WSL2 systems with multiple driver roots.
|
||||
* Remove dependency on coreutils when installing package on RPM-based systems.
|
||||
* Create output folders if required when running `nvidia-ctk runtime configure`
|
||||
* Create ouput folders if required when running `nvidia-ctk runtime configure`
|
||||
* Generate default config as post-install step.
|
||||
* Added support for detecting GSP firmware at custom paths when generating CDI specifications.
|
||||
* Added logic to skip the extraction of image requirements if `NVIDIA_DISABLE_REQUIRES` is set to `true`.
|
||||
|
||||
@@ -34,7 +34,7 @@ environment variables.
|
||||
|
||||
## Testing packages locally
|
||||
|
||||
The [tests/release](./tests/release/) folder contains documentation on how the installation of local or staged packages can be tested.
|
||||
The [test/release](./test/release/) folder contains documentation on how the installation of local or staged packages can be tested.
|
||||
|
||||
|
||||
## Releasing
|
||||
|
||||
142
Jenkinsfile
vendored
Normal file
142
Jenkinsfile
vendored
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
podTemplate (cloud:'sw-gpu-cloudnative',
|
||||
containers: [
|
||||
containerTemplate(name: 'docker', image: 'docker:dind', ttyEnabled: true, privileged: true),
|
||||
containerTemplate(name: 'golang', image: 'golang:1.16.3', ttyEnabled: true)
|
||||
]) {
|
||||
node(POD_LABEL) {
|
||||
def scmInfo
|
||||
|
||||
stage('checkout') {
|
||||
scmInfo = checkout(scm)
|
||||
}
|
||||
|
||||
stage('dependencies') {
|
||||
container('golang') {
|
||||
sh 'GO111MODULE=off go get -u github.com/client9/misspell/cmd/misspell'
|
||||
sh 'GO111MODULE=off go get -u github.com/gordonklaus/ineffassign'
|
||||
sh 'GO111MODULE=off go get -u golang.org/x/lint/golint'
|
||||
}
|
||||
container('docker') {
|
||||
sh 'apk add --no-cache make bash git'
|
||||
}
|
||||
}
|
||||
stage('check') {
|
||||
parallel (
|
||||
getGolangStages(["assert-fmt", "lint", "vet", "ineffassign", "misspell"])
|
||||
)
|
||||
}
|
||||
stage('test') {
|
||||
parallel (
|
||||
getGolangStages(["test"])
|
||||
)
|
||||
}
|
||||
|
||||
def versionInfo
|
||||
stage('version') {
|
||||
container('docker') {
|
||||
versionInfo = getVersionInfo(scmInfo)
|
||||
println "versionInfo=${versionInfo}"
|
||||
}
|
||||
}
|
||||
|
||||
def dist = 'ubuntu20.04'
|
||||
def arch = 'amd64'
|
||||
def stageLabel = "${dist}-${arch}"
|
||||
|
||||
stage('build-one') {
|
||||
container('docker') {
|
||||
stage (stageLabel) {
|
||||
sh "make ${dist}-${arch}"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('release') {
|
||||
container('docker') {
|
||||
stage (stageLabel) {
|
||||
|
||||
def component = 'main'
|
||||
def repository = 'sw-gpu-cloudnative-debian-local/pool/main/'
|
||||
|
||||
def uploadSpec = """{
|
||||
"files":
|
||||
[ {
|
||||
"pattern": "./dist/${dist}/${arch}/*.deb",
|
||||
"target": "${repository}",
|
||||
"props": "deb.distribution=${dist};deb.component=${component};deb.architecture=${arch}"
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
sh "echo starting release with versionInfo=${versionInfo}"
|
||||
if (versionInfo.isTag) {
|
||||
// upload to artifactory repository
|
||||
def server = Artifactory.server 'sw-gpu-artifactory'
|
||||
server.upload spec: uploadSpec
|
||||
} else {
|
||||
sh "echo skipping release for non-tagged build"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def getGolangStages(def targets) {
|
||||
stages = [:]
|
||||
|
||||
for (t in targets) {
|
||||
stages[t] = getLintClosure(t)
|
||||
}
|
||||
|
||||
return stages
|
||||
}
|
||||
|
||||
def getLintClosure(def target) {
|
||||
return {
|
||||
container('golang') {
|
||||
stage(target) {
|
||||
sh "make ${target}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getVersionInfo returns a hash of version info
|
||||
def getVersionInfo(def scmInfo) {
|
||||
def versionInfo = [
|
||||
isTag: isTag(scmInfo.GIT_BRANCH)
|
||||
]
|
||||
|
||||
scmInfo.each { k, v -> versionInfo[k] = v }
|
||||
return versionInfo
|
||||
}
|
||||
|
||||
def isTag(def branch) {
|
||||
if (!branch.startsWith('v')) {
|
||||
return false
|
||||
}
|
||||
|
||||
def version = shOutput('git describe --all --exact-match --always')
|
||||
return version == "tags/${branch}"
|
||||
}
|
||||
|
||||
def shOuptut(def script) {
|
||||
return sh(script: script, returnStdout: true).trim()
|
||||
}
|
||||
116
Makefile
116
Makefile
@@ -38,8 +38,8 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
|
||||
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
|
||||
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
|
||||
|
||||
CHECK_TARGETS := lint
|
||||
MAKE_TARGETS := binaries build check fmt test examples cmds coverage generate licenses vendor check-vendor $(CHECK_TARGETS)
|
||||
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
|
||||
|
||||
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
|
||||
|
||||
@@ -53,26 +53,22 @@ CLI_VERSION = $(VERSION)
|
||||
endif
|
||||
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
|
||||
|
||||
GOOS ?= linux
|
||||
|
||||
binaries: cmds
|
||||
ifneq ($(PREFIX),)
|
||||
cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
|
||||
endif
|
||||
cmds: $(CMD_TARGETS)
|
||||
|
||||
ifneq ($(shell uname),Darwin)
|
||||
EXTLDFLAGS = -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files -Wl,-z,lazy
|
||||
else
|
||||
EXTLDFLAGS = -Wl,-undefined,dynamic_lookup
|
||||
endif
|
||||
$(CMD_TARGETS): cmd-%:
|
||||
go build -ldflags "-s -w '-extldflags=$(EXTLDFLAGS)' -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
|
||||
build:
|
||||
go build ./...
|
||||
GOOS=$(GOOS) go build ./...
|
||||
|
||||
examples: $(EXAMPLE_TARGETS)
|
||||
$(EXAMPLE_TARGETS): example-%:
|
||||
go build ./examples/$(*)
|
||||
GOOS=$(GOOS) go build ./examples/$(*)
|
||||
|
||||
all: check test build binary
|
||||
check: $(CHECK_TARGETS)
|
||||
@@ -82,75 +78,71 @@ fmt:
|
||||
go list -f '{{.Dir}}' $(MODULE)/... \
|
||||
| xargs gofmt -s -l -w
|
||||
|
||||
# Apply goimports -local github.com/NVIDIA/container-toolkit to the codebase
|
||||
goimports:
|
||||
go list -f {{.Dir}} $(MODULE)/... \
|
||||
| xargs goimports -local $(MODULE) -w
|
||||
assert-fmt:
|
||||
go list -f '{{.Dir}}' $(MODULE)/... \
|
||||
| xargs gofmt -s -l > fmt.out
|
||||
@if [ -s fmt.out ]; then \
|
||||
echo "\nERROR: The following files are not formatted:\n"; \
|
||||
cat fmt.out; \
|
||||
rm fmt.out; \
|
||||
exit 1; \
|
||||
else \
|
||||
rm fmt.out; \
|
||||
fi
|
||||
|
||||
ineffassign:
|
||||
ineffassign $(MODULE)/...
|
||||
|
||||
lint:
|
||||
golangci-lint run ./...
|
||||
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
|
||||
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
|
||||
|
||||
vendor: | mod-tidy mod-vendor mod-verify
|
||||
misspell:
|
||||
misspell $(MODULE)/...
|
||||
|
||||
mod-tidy:
|
||||
@for mod in $$(find . -name go.mod -not -path "./testdata/*" -not -path "./third_party/*"); do \
|
||||
echo "Tidying $$mod..."; ( \
|
||||
cd $$(dirname $$mod) && go mod tidy \
|
||||
) || exit 1; \
|
||||
done
|
||||
|
||||
mod-vendor:
|
||||
@for mod in $$(find . -name go.mod -not -path "./testdata/*" -not -path "./third_party/*" -not -path "./deployments/*"); do \
|
||||
echo "Vendoring $$mod..."; ( \
|
||||
cd $$(dirname $$mod) && go mod vendor \
|
||||
) || exit 1; \
|
||||
done
|
||||
|
||||
mod-verify:
|
||||
@for mod in $$(find . -name go.mod -not -path "./testdata/*" -not -path "./third_party/*"); do \
|
||||
echo "Verifying $$mod..."; ( \
|
||||
cd $$(dirname $$mod) && go mod verify | sed 's/^/ /g' \
|
||||
) || exit 1; \
|
||||
done
|
||||
|
||||
|
||||
check-vendor: vendor
|
||||
git diff --exit-code HEAD -- go.mod go.sum vendor
|
||||
vet:
|
||||
go vet $(MODULE)/...
|
||||
|
||||
licenses:
|
||||
go-licenses csv $(MODULE)/...
|
||||
|
||||
COVERAGE_FILE := coverage.out
|
||||
test: build cmds
|
||||
go test -coverprofile=$(COVERAGE_FILE).with-mocks $(MODULE)/...
|
||||
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
|
||||
|
||||
coverage: test
|
||||
cat $(COVERAGE_FILE).with-mocks | grep -v "_mock.go" > $(COVERAGE_FILE)
|
||||
go tool cover -func=$(COVERAGE_FILE)
|
||||
cat $(COVERAGE_FILE) | grep -v "_mock.go" > $(COVERAGE_FILE).no-mocks
|
||||
go tool cover -func=$(COVERAGE_FILE).no-mocks
|
||||
|
||||
generate:
|
||||
go generate $(MODULE)/...
|
||||
|
||||
# Generate an image for containerized builds
|
||||
# Note: This image is local only
|
||||
.PHONY: .build-image
|
||||
.build-image:
|
||||
make -f deployments/devel/Makefile .build-image
|
||||
.PHONY: .build-image .pull-build-image .push-build-image
|
||||
.build-image: docker/Dockerfile.devel
|
||||
if [ x"$(SKIP_IMAGE_BUILD)" = x"" ]; then \
|
||||
$(DOCKER) build \
|
||||
--progress=plain \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
-f $(^) \
|
||||
docker; \
|
||||
fi
|
||||
|
||||
ifeq ($(BUILD_DEVEL_IMAGE),yes)
|
||||
$(DOCKER_TARGETS): .build-image
|
||||
.shell: .build-image
|
||||
endif
|
||||
.pull-build-image:
|
||||
$(DOCKER) pull $(BUILDIMAGE)
|
||||
|
||||
$(DOCKER_TARGETS): docker-%:
|
||||
@echo "Running 'make $(*)' in container image $(BUILDIMAGE)"
|
||||
.push-build-image:
|
||||
$(DOCKER) push $(BUILDIMAGE)
|
||||
|
||||
$(DOCKER_TARGETS): docker-%: .build-image
|
||||
@echo "Running 'make $(*)' in docker container $(BUILDIMAGE)"
|
||||
$(DOCKER) run \
|
||||
--rm \
|
||||
-e GOCACHE=/tmp/.cache/go \
|
||||
-e GOMODCACHE=/tmp/.cache/gomod \
|
||||
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
|
||||
-v $(PWD):/work \
|
||||
-w /work \
|
||||
-e GOCACHE=/tmp/.cache \
|
||||
-v $(PWD):$(PWD) \
|
||||
-w $(PWD) \
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE) \
|
||||
make $(*)
|
||||
@@ -161,10 +153,8 @@ PHONY: .shell
|
||||
$(DOCKER) run \
|
||||
--rm \
|
||||
-ti \
|
||||
-e GOCACHE=/tmp/.cache/go \
|
||||
-e GOMODCACHE=/tmp/.cache/gomod \
|
||||
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
|
||||
-v $(PWD):/work \
|
||||
-w /work \
|
||||
-e GOCACHE=/tmp/.cache \
|
||||
-v $(PWD):$(PWD) \
|
||||
-w $(PWD) \
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE)
|
||||
|
||||
36
RELEASE.md
36
RELEASE.md
@@ -1,36 +0,0 @@
|
||||
# Release Process
|
||||
|
||||
The NVIDIA Container Toolkit consists of the following artifacts:
|
||||
- The NVIDIA Container Toolkit container
|
||||
- Packages for debian-based systems
|
||||
- Packages for rpm-based systems
|
||||
|
||||
# Release Process Checklist:
|
||||
- [ ] Create a release PR:
|
||||
- [ ] Run the `./hack/prepare-release.sh` script to update the version in all the needed files. This also creates a [release issue](https://github.com/NVIDIA/cloud-native-team/issues?q=is%3Aissue+is%3Aopen+label%3Arelease)
|
||||
- [ ] Run the `./hack/generate-changelog.sh` script to generate the a draft changelog and update `CHANGELOG.md` with the changes.
|
||||
- [ ] Create a PR from the created `bump-release-{{ .VERSION }}` branch.
|
||||
- [ ] Merge the release PR
|
||||
- [ ] Tag the release and push the tag to the `internal` mirror:
|
||||
- [ ] Image release pipeline: https://gitlab-master.nvidia.com/dl/container-dev/container-toolkit/-/pipelines/16466098
|
||||
- [ ] Wait for the image release to complete.
|
||||
- [ ] Push the tag to the the upstream GitHub repo.
|
||||
- [ ] Wait for the [`Release`](https://github.com/NVIDIA/k8s-device-plugin/actions/workflows/release.yaml) GitHub Action to complete
|
||||
- [ ] Publish the [draft release](https://github.com/NVIDIA/k8s-device-plugin/releases) created by the GitHub Action
|
||||
- [ ] Publish the packages to the gh-pages branch of the libnvidia-container repo
|
||||
- [ ] Create a KitPick
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
*Note*: This assumes that we have the release tag checked out locally.
|
||||
|
||||
- If the `Release` GitHub Action fails:
|
||||
- Check the logs for the error first.
|
||||
- Create the helm packages locally by running:
|
||||
```bash
|
||||
./hack/prepare-artifacts.sh {{ .VERSION }}
|
||||
```
|
||||
- Create the draft release by running:
|
||||
```bash
|
||||
./hack/create-release.sh {{ .VERSION }}
|
||||
```
|
||||
24
SECURITY.md
24
SECURITY.md
@@ -1,24 +0,0 @@
|
||||
# Security
|
||||
|
||||
NVIDIA is dedicated to the security and trust of our software products and services, including all source code repositories managed through our organization.
|
||||
|
||||
If you need to report a security issue, please use the appropriate contact points outlined below. **Please do not report security vulnerabilities through GitHub.**
|
||||
|
||||
## Reporting Potential Security Vulnerability in an NVIDIA Product
|
||||
|
||||
To report a potential security vulnerability in any NVIDIA product:
|
||||
- Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
|
||||
- E-Mail: psirt@nvidia.com
|
||||
- We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
|
||||
- Please include the following information:
|
||||
- Product/Driver name and version/branch that contains the vulnerability
|
||||
- Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
|
||||
- Instructions to reproduce the vulnerability
|
||||
- Proof-of-concept or exploit code
|
||||
- Potential impact of the vulnerability, including how an attacker could exploit the vulnerability
|
||||
|
||||
While NVIDIA currently does not have a bug bounty program, we do offer acknowledgement when an externally reported security issue is addressed under our coordinated vulnerability disclosure policy. Please visit our [Product Security Incident Response Team (PSIRT)](https://www.nvidia.com/en-us/security/psirt-policies/) policies page for more information.
|
||||
|
||||
## NVIDIA Product Security
|
||||
|
||||
For all security-related concerns, please visit NVIDIA's Product Security portal at https://www.nvidia.com/en-us/security
|
||||
@@ -12,10 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG BASE_DIST
|
||||
ARG CUDA_VERSION
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG VERSION="N/A"
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi8 AS build
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
|
||||
|
||||
RUN yum install -y \
|
||||
wget make git gcc \
|
||||
@@ -29,25 +31,36 @@ RUN set -eux; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64 | arm64) ARCH='arm64' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
| tar -C /usr/local -xz
|
||||
|
||||
|
||||
ENV GOPATH=/go
|
||||
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
WORKDIR /build
|
||||
COPY . .
|
||||
|
||||
RUN mkdir /artifacts
|
||||
ARG VERSION="N/A"
|
||||
ARG GIT_COMMIT="unknown"
|
||||
RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
|
||||
# NOTE: Until the config utilities are properly integrated into the
|
||||
# nvidia-container-toolkit repository, these are built from the `tools` folder
|
||||
# and not `cmd`.
|
||||
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi8
|
||||
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ARG BASE_DIST
|
||||
# See https://www.centos.org/centos-linux-eol/
|
||||
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
|
||||
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
|
||||
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
|
||||
( \
|
||||
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
|
||||
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
|
||||
)
|
||||
|
||||
ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=void
|
||||
@@ -61,8 +74,7 @@ WORKDIR /artifacts/packages
|
||||
|
||||
ARG PACKAGE_VERSION
|
||||
ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH=${TARGETARCH}
|
||||
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
|
||||
yum localinstall -y \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
|
||||
@@ -71,12 +83,10 @@ RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
|
||||
RUN ln -s nvidia-ctk-installer nvidia-toolkit
|
||||
COPY --from=build /artifacts/bin /work
|
||||
|
||||
ENV PATH=/work:$PATH
|
||||
|
||||
ARG VERSION
|
||||
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
|
||||
LABEL name="NVIDIA Container Runtime Config"
|
||||
LABEL vendor="NVIDIA"
|
||||
@@ -87,4 +97,4 @@ LABEL description="See summary"
|
||||
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
ENTRYPOINT ["/work/nvidia-ctk-installer"]
|
||||
ENTRYPOINT ["/work/nvidia-toolkit"]
|
||||
@@ -12,9 +12,11 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG BASE_DIST
|
||||
ARG CUDA_VERSION
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ARG ARTIFACTS_ROOT
|
||||
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
@@ -22,6 +24,7 @@ COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
# build-args are added to the manifest.txt file below.
|
||||
ARG BASE_DIST
|
||||
ARG PACKAGE_DIST
|
||||
ARG PACKAGE_VERSION
|
||||
ARG GIT_BRANCH
|
||||
@@ -12,10 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG BASE_DIST
|
||||
ARG CUDA_VERSION
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG VERSION="N/A"
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04 AS build
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget make git gcc \
|
||||
@@ -29,24 +31,25 @@ RUN set -eux; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64 | arm64) ARCH='arm64' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
| tar -C /usr/local -xz
|
||||
|
||||
ENV GOPATH=/go
|
||||
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
WORKDIR /build
|
||||
COPY . .
|
||||
|
||||
RUN mkdir /artifacts
|
||||
ARG VERSION="N/A"
|
||||
ARG GIT_COMMIT="unknown"
|
||||
RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
|
||||
# NOTE: Until the config utilities are properly integrated into the
|
||||
# nvidia-container-toolkit repository, these are built from the `tools` folder
|
||||
# and not `cmd`.
|
||||
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
|
||||
RUN rm -f /etc/apt/sources.list.d/cuda.list
|
||||
@@ -70,7 +73,15 @@ WORKDIR /artifacts/packages
|
||||
|
||||
ARG PACKAGE_VERSION
|
||||
ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH=${TARGETARCH}
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container/stable"
|
||||
ARG LIBNVIDIA_CONTAINER0_VERSION
|
||||
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
|
||||
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
|
||||
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
|
||||
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
|
||||
fi
|
||||
|
||||
RUN dpkg -i \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
|
||||
@@ -79,12 +90,10 @@ RUN dpkg -i \
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
|
||||
RUN ln -s nvidia-ctk-installer nvidia-toolkit
|
||||
COPY --from=build /artifacts/bin /work/
|
||||
|
||||
ENV PATH=/work:$PATH
|
||||
|
||||
ARG VERSION
|
||||
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
|
||||
LABEL name="NVIDIA Container Runtime Config"
|
||||
LABEL vendor="NVIDIA"
|
||||
@@ -95,4 +104,4 @@ LABEL description="See summary"
|
||||
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
ENTRYPOINT ["/work/nvidia-ctk-installer"]
|
||||
ENTRYPOINT ["/work/nvidia-toolkit"]
|
||||
@@ -27,6 +27,12 @@ DIST_DIR ?= $(CURDIR)/dist
|
||||
##### Global variables #####
|
||||
include $(CURDIR)/versions.mk
|
||||
|
||||
ifeq ($(IMAGE_NAME),)
|
||||
REGISTRY ?= nvidia
|
||||
IMAGE_NAME := $(REGISTRY)/container-toolkit
|
||||
endif
|
||||
|
||||
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
IMAGE_VERSION := $(VERSION)
|
||||
|
||||
IMAGE_TAG ?= $(VERSION)-$(DIST)
|
||||
@@ -39,11 +45,10 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
|
||||
|
||||
##### Public rules #####
|
||||
DEFAULT_PUSH_TARGET := ubuntu20.04
|
||||
DISTRIBUTIONS := ubuntu20.04 ubi8
|
||||
DISTRIBUTIONS := ubuntu20.04 ubi8 centos7
|
||||
|
||||
META_TARGETS := packaging
|
||||
|
||||
IMAGE_TARGETS := $(patsubst %,image-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
|
||||
@@ -51,9 +56,9 @@ TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
|
||||
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||
|
||||
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
|
||||
include $(CURDIR)/deployments/container/native-only.mk
|
||||
include $(CURDIR)/build/container/native-only.mk
|
||||
else
|
||||
include $(CURDIR)/deployments/container/multi-arch.mk
|
||||
include $(CURDIR)/build/container/multi-arch.mk
|
||||
endif
|
||||
|
||||
# For the default push target we also push a short tag equal to the version.
|
||||
@@ -79,20 +84,22 @@ push-short:
|
||||
|
||||
|
||||
build-%: DIST = $(*)
|
||||
build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SUFFIX)
|
||||
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
|
||||
|
||||
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
||||
|
||||
# Use a generic build target to build the relevant images
|
||||
$(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
|
||||
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
DOCKER_BUILDKIT=1 \
|
||||
$(DOCKER) $(BUILDX) build --pull \
|
||||
--provenance=false --sbom=false \
|
||||
$(DOCKER_BUILD_OPTIONS) \
|
||||
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
|
||||
--tag $(IMAGE) \
|
||||
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
|
||||
--build-arg BASE_DIST="$(BASE_DIST)" \
|
||||
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
|
||||
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
|
||||
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
|
||||
--build-arg VERSION="$(VERSION)" \
|
||||
@@ -103,12 +110,21 @@ $(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
|
||||
-f $(DOCKERFILE) \
|
||||
$(CURDIR)
|
||||
|
||||
|
||||
build-ubuntu%: BASE_DIST = $(*)
|
||||
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
|
||||
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
|
||||
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
|
||||
|
||||
build-ubi8: DOCKERFILE_SUFFIX := ubi8
|
||||
build-ubi8: BASE_DIST := ubi8
|
||||
build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
build-ubi8: PACKAGE_DIST = centos7
|
||||
|
||||
build-centos7: BASE_DIST = $(*)
|
||||
build-centos7: DOCKERFILE_SUFFIX := centos
|
||||
build-centos7: PACKAGE_DIST = $(BASE_DIST)
|
||||
|
||||
build-packaging: BASE_DIST := ubuntu20.04
|
||||
build-packaging: DOCKERFILE_SUFFIX := packaging
|
||||
build-packaging: PACKAGE_ARCH := amd64
|
||||
build-packaging: PACKAGE_DIST = all
|
||||
@@ -116,13 +132,7 @@ build-packaging: PACKAGE_DIST = all
|
||||
# Test targets
|
||||
test-%: DIST = $(*)
|
||||
|
||||
# Handle the default build target.
|
||||
.PHONY: build
|
||||
build: $(DEFAULT_PUSH_TARGET)
|
||||
$(DEFAULT_PUSH_TARGET): build-$(DEFAULT_PUSH_TARGET)
|
||||
$(DEFAULT_PUSH_TARGET): DIST = $(DEFAULT_PUSH_TARGET)
|
||||
|
||||
TEST_CASES ?= docker crio containerd
|
||||
TEST_CASES ?= toolkit docker crio containerd
|
||||
$(TEST_TARGETS): test-%:
|
||||
TEST_CASES="$(TEST_CASES)" bash -x $(CURDIR)/test/container/main.sh run \
|
||||
$(CURDIR)/shared-$(*) \
|
||||
@@ -16,7 +16,11 @@ PUSH_ON_BUILD ?= false
|
||||
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
|
||||
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
|
||||
|
||||
$(BUILD_TARGETS): build-%: image-%
|
||||
# We only have x86_64 packages for centos7
|
||||
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate amd64 image for ubuntu18.04
|
||||
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate a single image for packaging targets
|
||||
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
@@ -1,5 +1,4 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -12,12 +11,5 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
// WARNING: THIS FILE WAS AUTOMATICALLY GENERATED.
|
||||
// Code generated by https://git.io/c-for-go. DO NOT EDIT.
|
||||
|
||||
/*
|
||||
Package NVSANDBOXUTILS bindings
|
||||
*/
|
||||
package nvsandboxutils
|
||||
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
@@ -1,31 +0,0 @@
|
||||
# NVIDIA CDI Hook
|
||||
|
||||
The CLI `nvidia-cdi-hook` provides container device runtime hook capabilities when
|
||||
called by a container runtime, as specific in a
|
||||
[Container Device Interface](https://tags.cncf.io/container-device-interface/blob/main/SPEC.md)
|
||||
file.
|
||||
|
||||
## Generating a CDI
|
||||
|
||||
The CDI itself is created for an NVIDIA-capable device using the
|
||||
[`nvidia-ctk cdi generate`](../nvidia-ctk/) command.
|
||||
|
||||
When `nvidia-ctk cdi generate` is run, the CDI specification is generated as a yaml file.
|
||||
The CDI specification provides instructions for a container runtime to set up devices, files and
|
||||
other resources for the container prior to starting it. Those instructions
|
||||
may include executing command-line tools to prepare the filesystem. The execution
|
||||
of such command-line tools is called a hook.
|
||||
|
||||
`nvidia-cdi-hook` is the CLI tool that is expected to be called by the container runtime,
|
||||
when specified by the CDI file.
|
||||
|
||||
See the [`nvidia-ctk` documentation](../nvidia-ctk/README.md) for more information
|
||||
on generating a CDI file.
|
||||
|
||||
## Functionality
|
||||
|
||||
The `nvidia-cdi-hook` CLI provides the following functionality:
|
||||
|
||||
* `chmod` - Change the permissions of a file or directory inside the directory path to be mounted into a container.
|
||||
* `create-symlinks` - Create symlinks inside the directory path to be mounted into a container.
|
||||
* `update-ldcache` - Update the dynamic linker cache inside the directory path to be mounted into a container.
|
||||
@@ -1,53 +0,0 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package commands
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat"
|
||||
disabledevicenodemodification "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/disable-device-node-modification"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
// New creates the commands associated with supported CDI hooks.
|
||||
// These are shared by the nvidia-cdi-hook and nvidia-ctk hook commands.
|
||||
func New(logger logger.Interface) []*cli.Command {
|
||||
return []*cli.Command{
|
||||
ldcache.NewCommand(logger),
|
||||
symlinks.NewCommand(logger),
|
||||
chmod.NewCommand(logger),
|
||||
cudacompat.NewCommand(logger),
|
||||
disabledevicenodemodification.NewCommand(logger),
|
||||
}
|
||||
}
|
||||
|
||||
// IssueUnsupportedHookWarning logs a warning that no hook or an unsupported
|
||||
// hook has been specified.
|
||||
// This happens if a subcommand is provided that does not match one of the
|
||||
// subcommands that has been explicitly specified.
|
||||
func IssueUnsupportedHookWarning(logger logger.Interface, c *cli.Context) {
|
||||
args := c.Args().Slice()
|
||||
if len(args) == 0 {
|
||||
logger.Warningf("No CDI hook specified")
|
||||
} else {
|
||||
logger.Warningf("Unsupported CDI hook: %v", args[0])
|
||||
}
|
||||
}
|
||||
@@ -1,172 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package symlinks
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/moby/sys/symlink"
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type config struct {
|
||||
links cli.StringSlice
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a hook command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build creates the create-symlink command.
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
c := cli.Command{
|
||||
Name: "create-symlinks",
|
||||
Usage: "A hook to create symlinks in the container.",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "link",
|
||||
Usage: "Specify a specific link to create. The link is specified as target::link. If the link exists in the container root, it is removed.",
|
||||
Destination: &cfg.links,
|
||||
},
|
||||
// The following flags are testing-only flags.
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN. This is only intended for testing.",
|
||||
Destination: &cfg.containerSpec,
|
||||
Hidden: true,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
created := make(map[string]bool)
|
||||
for _, l := range cfg.links.Value() {
|
||||
if created[l] {
|
||||
m.logger.Debugf("Link %v already processed", l)
|
||||
continue
|
||||
}
|
||||
parts := strings.Split(l, "::")
|
||||
if len(parts) != 2 {
|
||||
return fmt.Errorf("invalid symlink specification %v", l)
|
||||
}
|
||||
|
||||
err := m.createLink(containerRoot, parts[0], parts[1])
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create link %v: %w", parts, err)
|
||||
}
|
||||
created[l] = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// createLink creates a symbolic link in the specified container root.
|
||||
// This is equivalent to:
|
||||
//
|
||||
// chroot {{ .containerRoot }} ln -f -s {{ .target }} {{ .link }}
|
||||
//
|
||||
// If the specified link already exists and points to the same target, this
|
||||
// operation is a no-op.
|
||||
// If a file exists at the link path or the link points to a different target
|
||||
// this file is removed before creating the link.
|
||||
//
|
||||
// Note that if the link path resolves to an absolute path oudside of the
|
||||
// specified root, this is treated as an absolute path in this root.
|
||||
func (m command) createLink(containerRoot string, targetPath string, link string) error {
|
||||
linkPath := filepath.Join(containerRoot, link)
|
||||
|
||||
exists, err := linkExists(targetPath, linkPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check if link exists: %w", err)
|
||||
}
|
||||
if exists {
|
||||
m.logger.Debugf("Link %s already exists", linkPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// We resolve the parent of the symlink that we're creating in the container root.
|
||||
// If we resolve the full link path, an existing link at the location itself
|
||||
// is also resolved here and we are unable to force create the link.
|
||||
resolvedLinkParent, err := symlink.FollowSymlinkInScope(filepath.Dir(linkPath), containerRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to follow path for link %v relative to %v: %w", link, containerRoot, err)
|
||||
}
|
||||
resolvedLinkPath := filepath.Join(resolvedLinkParent, filepath.Base(linkPath))
|
||||
|
||||
m.logger.Infof("Symlinking %v to %v", resolvedLinkPath, targetPath)
|
||||
err = os.MkdirAll(filepath.Dir(resolvedLinkPath), 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
err = symlinks.ForceCreate(targetPath, resolvedLinkPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create symlink: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// linkExists checks whether the specified link exists.
|
||||
// A link exists if the path exists, is a symlink, and points to the specified target.
|
||||
func linkExists(target string, link string) (bool, error) {
|
||||
currentTarget, err := symlinks.Resolve(link)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return false, nil
|
||||
}
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to resolve existing symlink %s: %w", link, err)
|
||||
}
|
||||
if currentTarget == target {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
@@ -1,297 +0,0 @@
|
||||
package symlinks
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
|
||||
)
|
||||
|
||||
func TestLinkExist(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
require.NoError(
|
||||
t,
|
||||
makeFs(tmpDir,
|
||||
dirOrLink{path: "/a/b/c", target: "d"},
|
||||
dirOrLink{path: "/a/b/e", target: "/a/b/f"},
|
||||
),
|
||||
)
|
||||
|
||||
exists, err := linkExists("d", filepath.Join(tmpDir, "/a/b/c"))
|
||||
require.NoError(t, err)
|
||||
require.True(t, exists)
|
||||
|
||||
exists, err = linkExists("/a/b/f", filepath.Join(tmpDir, "/a/b/e"))
|
||||
require.NoError(t, err)
|
||||
require.True(t, exists)
|
||||
|
||||
exists, err = linkExists("different-target", filepath.Join(tmpDir, "/a/b/c"))
|
||||
require.NoError(t, err)
|
||||
require.False(t, exists)
|
||||
|
||||
exists, err = linkExists("/a/b/d", filepath.Join(tmpDir, "/a/b/c"))
|
||||
require.NoError(t, err)
|
||||
require.False(t, exists)
|
||||
|
||||
exists, err = linkExists("foo", filepath.Join(tmpDir, "/a/b/does-not-exist"))
|
||||
require.NoError(t, err)
|
||||
require.False(t, exists)
|
||||
}
|
||||
|
||||
func TestCreateLink(t *testing.T) {
|
||||
type link struct {
|
||||
path string
|
||||
target string
|
||||
}
|
||||
type expectedLink struct {
|
||||
link
|
||||
err error
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
containerContents []dirOrLink
|
||||
link link
|
||||
expectedCreateError error
|
||||
expectedLinks []expectedLink
|
||||
}{
|
||||
{
|
||||
description: "link to / resolves to container root",
|
||||
containerContents: []dirOrLink{
|
||||
{path: "/lib/foo", target: "/"},
|
||||
},
|
||||
link: link{
|
||||
path: "/lib/foo/libfoo.so",
|
||||
target: "libfoo.so.1",
|
||||
},
|
||||
expectedLinks: []expectedLink{
|
||||
{
|
||||
link: link{
|
||||
path: "{{ .containerRoot }}/libfoo.so",
|
||||
target: "libfoo.so.1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "link to / resolves to container root; parent relative link",
|
||||
containerContents: []dirOrLink{
|
||||
{path: "/lib/foo", target: "/"},
|
||||
},
|
||||
link: link{
|
||||
path: "/lib/foo/libfoo.so",
|
||||
target: "../libfoo.so.1",
|
||||
},
|
||||
expectedLinks: []expectedLink{
|
||||
{
|
||||
link: link{
|
||||
path: "{{ .containerRoot }}/libfoo.so",
|
||||
target: "../libfoo.so.1",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "link to / resolves to container root; absolute link",
|
||||
containerContents: []dirOrLink{
|
||||
{path: "/lib/foo", target: "/"},
|
||||
},
|
||||
link: link{
|
||||
path: "/lib/foo/libfoo.so",
|
||||
target: "/a-path-in-container/foo/libfoo.so.1",
|
||||
},
|
||||
expectedLinks: []expectedLink{
|
||||
{
|
||||
link: link{
|
||||
path: "{{ .containerRoot }}/libfoo.so",
|
||||
target: "/a-path-in-container/foo/libfoo.so.1",
|
||||
},
|
||||
},
|
||||
{
|
||||
// We also check that the target is NOT created.
|
||||
link: link{
|
||||
path: "{{ .containerRoot }}/a-path-in-container/foo/libfoo.so.1",
|
||||
},
|
||||
err: os.ErrNotExist,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
hostRoot := filepath.Join(tmpDir, "/host-root/")
|
||||
containerRoot := filepath.Join(tmpDir, "/container-root")
|
||||
|
||||
require.NoError(t, makeFs(hostRoot))
|
||||
require.NoError(t, makeFs(containerRoot, tc.containerContents...))
|
||||
|
||||
// nvidia-cdi-hook create-symlinks --link linkSpec
|
||||
err := getTestCommand().createLink(containerRoot, tc.link.target, tc.link.path)
|
||||
// TODO: We may be able to replace this with require.ErrorIs.
|
||||
if tc.expectedCreateError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
for _, expectedLink := range tc.expectedLinks {
|
||||
path := strings.ReplaceAll(expectedLink.path, "{{ .containerRoot }}", containerRoot)
|
||||
path = strings.ReplaceAll(path, "{{ .hostRoot }}", hostRoot)
|
||||
if expectedLink.target != "" {
|
||||
target, err := symlinks.Resolve(path)
|
||||
require.ErrorIs(t, err, expectedLink.err)
|
||||
require.Equal(t, expectedLink.target, target)
|
||||
} else {
|
||||
_, err := os.Stat(path)
|
||||
require.ErrorIs(t, err, expectedLink.err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateLinkRelativePath(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
hostRoot := filepath.Join(tmpDir, "/host-root/")
|
||||
containerRoot := filepath.Join(tmpDir, "/container-root")
|
||||
|
||||
require.NoError(t, makeFs(hostRoot))
|
||||
require.NoError(t, makeFs(containerRoot, dirOrLink{path: "/lib/"}))
|
||||
|
||||
// nvidia-cdi-hook create-symlinks --link libfoo.so.1::/lib/libfoo.so
|
||||
err := getTestCommand().createLink(containerRoot, "libfoo.so.1", "/lib/libfoo.so")
|
||||
require.NoError(t, err)
|
||||
|
||||
target, err := symlinks.Resolve(filepath.Join(containerRoot, "/lib/libfoo.so"))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "libfoo.so.1", target)
|
||||
}
|
||||
|
||||
func TestCreateLinkAbsolutePath(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
hostRoot := filepath.Join(tmpDir, "/host-root/")
|
||||
containerRoot := filepath.Join(tmpDir, "/container-root")
|
||||
|
||||
require.NoError(t, makeFs(hostRoot))
|
||||
require.NoError(t, makeFs(containerRoot, dirOrLink{path: "/lib/"}))
|
||||
|
||||
// nvidia-cdi-hook create-symlinks --link /lib/libfoo.so.1::/lib/libfoo.so
|
||||
err := getTestCommand().createLink(containerRoot, "/lib/libfoo.so.1", "/lib/libfoo.so")
|
||||
require.NoError(t, err)
|
||||
|
||||
target, err := symlinks.Resolve(filepath.Join(containerRoot, "/lib/libfoo.so"))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/lib/libfoo.so.1", target)
|
||||
}
|
||||
|
||||
func TestCreateLinkAlreadyExists(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
containerContents []dirOrLink
|
||||
shouldExist []string
|
||||
}{
|
||||
{
|
||||
description: "link already exists with correct target",
|
||||
containerContents: []dirOrLink{{path: "/lib/libfoo.so", target: "libfoo.so.1"}},
|
||||
shouldExist: []string{},
|
||||
},
|
||||
{
|
||||
description: "link already exists with different target",
|
||||
containerContents: []dirOrLink{{path: "/lib/libfoo.so", target: "different-target"}, {path: "different-target"}},
|
||||
shouldExist: []string{"{{ .containerRoot }}/different-target"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
hostRoot := filepath.Join(tmpDir, "/host-root/")
|
||||
containerRoot := filepath.Join(tmpDir, "/container-root")
|
||||
require.NoError(t, makeFs(hostRoot))
|
||||
require.NoError(t, makeFs(containerRoot, tc.containerContents...))
|
||||
|
||||
// nvidia-cdi-hook create-symlinks --link libfoo.so.1::/lib/libfoo.so
|
||||
err := getTestCommand().createLink(containerRoot, "libfoo.so.1", "/lib/libfoo.so")
|
||||
require.NoError(t, err)
|
||||
target, err := symlinks.Resolve(filepath.Join(containerRoot, "lib/libfoo.so"))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "libfoo.so.1", target)
|
||||
|
||||
for _, p := range tc.shouldExist {
|
||||
require.DirExists(t, strings.ReplaceAll(p, "{{ .containerRoot }}", containerRoot))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateLinkOutOfBounds(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
hostRoot := filepath.Join(tmpDir, "/host-root")
|
||||
containerRoot := filepath.Join(tmpDir, "/container-root")
|
||||
|
||||
require.NoError(t,
|
||||
makeFs(hostRoot,
|
||||
dirOrLink{path: "libfoo.so"},
|
||||
),
|
||||
)
|
||||
require.NoError(t,
|
||||
makeFs(containerRoot,
|
||||
dirOrLink{path: "/lib"},
|
||||
dirOrLink{path: "/lib/foo", target: hostRoot},
|
||||
),
|
||||
)
|
||||
|
||||
path, err := symlinks.Resolve(filepath.Join(containerRoot, "/lib/foo"))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, hostRoot, path)
|
||||
|
||||
// nvidia-cdi-hook create-symlinks --link ../libfoo.so.1::/lib/foo/libfoo.so
|
||||
_ = getTestCommand().createLink(containerRoot, "../libfoo.so.1", "/lib/foo/libfoo.so")
|
||||
require.NoError(t, err)
|
||||
|
||||
target, err := symlinks.Resolve(filepath.Join(containerRoot, hostRoot, "libfoo.so"))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "../libfoo.so.1", target)
|
||||
|
||||
require.DirExists(t, filepath.Join(hostRoot, "libfoo.so"))
|
||||
}
|
||||
|
||||
type dirOrLink struct {
|
||||
path string
|
||||
target string
|
||||
}
|
||||
|
||||
func makeFs(tmpdir string, fs ...dirOrLink) error {
|
||||
if err := os.MkdirAll(tmpdir, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, s := range fs {
|
||||
s.path = filepath.Join(tmpdir, s.path)
|
||||
if s.target == "" {
|
||||
_ = os.MkdirAll(s.path, 0o755)
|
||||
continue
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(s.path), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink(s.target, s.path); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getTestCommand creates a command for running tests against.
|
||||
func getTestCommand() *command {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
return &command{
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cudacompat
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/moby/sys/symlink"
|
||||
)
|
||||
|
||||
// A containerRoot represents the root filesystem of a container.
|
||||
type containerRoot string
|
||||
|
||||
// hasPath checks whether the specified path exists in the root.
|
||||
func (r containerRoot) hasPath(path string) bool {
|
||||
resolved, err := r.resolve(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// globFiles matches the specified pattern in the root.
|
||||
// The files that match must be regular files.
|
||||
func (r containerRoot) globFiles(pattern string) ([]string, error) {
|
||||
patternPath, err := r.resolve(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
matches, err := filepath.Glob(patternPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var files []string
|
||||
for _, match := range matches {
|
||||
info, err := os.Lstat(match)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Ignore symlinks.
|
||||
if info.Mode()&os.ModeSymlink != 0 {
|
||||
continue
|
||||
}
|
||||
// Ignore directories.
|
||||
if info.IsDir() {
|
||||
continue
|
||||
}
|
||||
files = append(files, match)
|
||||
}
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// resolve returns the absolute path including root path.
|
||||
// Symlinks are resolved, but are guaranteed to resolve in the root.
|
||||
func (r containerRoot) resolve(path string) (string, error) {
|
||||
absolute := filepath.Clean(filepath.Join(string(r), path))
|
||||
return symlink.FollowSymlinkInScope(absolute, string(r))
|
||||
}
|
||||
@@ -1,221 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cudacompat
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
cudaCompatPath = "/usr/local/cuda/compat"
|
||||
// cudaCompatLdsoconfdFilenamePattern specifies the pattern for the filename
|
||||
// in ld.so.conf.d that includes a reference to the CUDA compat path.
|
||||
// The 00-compat prefix is chosen to ensure that these libraries have a
|
||||
// higher precedence than other libraries on the system.
|
||||
cudaCompatLdsoconfdFilenamePattern = "00-compat-*.conf"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type options struct {
|
||||
hostDriverVersion string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a cuda-compat command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the enable-cuda-compat command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := options{}
|
||||
|
||||
// Create the 'enable-cuda-compat' command
|
||||
c := cli.Command{
|
||||
Name: "enable-cuda-compat",
|
||||
Usage: "This hook ensures that the folder containing the CUDA compat libraries is added to the ldconfig search path if required.",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "host-driver-version",
|
||||
Usage: "Specify the host driver version. If the CUDA compat libraries detected in the container do not have a higher MAJOR version, the hook is a no-op.",
|
||||
Destination: &cfg.hostDriverVersion,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Hidden: true,
|
||||
Category: "testing-only",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(_ *cli.Context, cfg *options) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(_ *cli.Context, cfg *options) error {
|
||||
if cfg.hostDriverVersion == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %w", err)
|
||||
}
|
||||
|
||||
containerRootDir, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %w", err)
|
||||
}
|
||||
|
||||
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.hostDriverVersion)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get container forward compat directory: %w", err)
|
||||
}
|
||||
if containerForwardCompatDir == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return m.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir)
|
||||
}
|
||||
|
||||
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) {
|
||||
if hostDriverVersion == "" {
|
||||
m.logger.Debugf("Host driver version not specified")
|
||||
return "", nil
|
||||
}
|
||||
if !containerRoot.hasPath(cudaCompatPath) {
|
||||
m.logger.Debugf("No CUDA forward compatibility libraries directory in container")
|
||||
return "", nil
|
||||
}
|
||||
if !containerRoot.hasPath("/etc/ld.so.cache") {
|
||||
m.logger.Debugf("The container does not have an LDCache")
|
||||
return "", nil
|
||||
}
|
||||
|
||||
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*"))
|
||||
if err != nil {
|
||||
m.logger.Warningf("Failed to find CUDA compat library: %w", err)
|
||||
return "", nil
|
||||
}
|
||||
|
||||
if len(libs) == 0 {
|
||||
m.logger.Debugf("No CUDA forward compatibility libraries container")
|
||||
return "", nil
|
||||
}
|
||||
|
||||
if len(libs) != 1 {
|
||||
m.logger.Warningf("Unexpected number of CUDA compat libraries in container: %v", libs)
|
||||
return "", nil
|
||||
}
|
||||
|
||||
compatDriverVersion := strings.TrimPrefix(filepath.Base(libs[0]), "libcuda.so.")
|
||||
compatMajor, err := extractMajorVersion(compatDriverVersion)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to extract major version from %q: %v", compatDriverVersion, err)
|
||||
}
|
||||
|
||||
driverMajor, err := extractMajorVersion(hostDriverVersion)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to extract major version from %q: %v", hostDriverVersion, err)
|
||||
}
|
||||
|
||||
if driverMajor >= compatMajor {
|
||||
m.logger.Debugf("Compat major version is not greater than the host driver major version (%v >= %v)", hostDriverVersion, compatDriverVersion)
|
||||
return "", nil
|
||||
}
|
||||
|
||||
resolvedCompatDir := strings.TrimPrefix(filepath.Dir(libs[0]), string(containerRoot))
|
||||
return resolvedCompatDir, nil
|
||||
}
|
||||
|
||||
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root.
|
||||
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
|
||||
// contains the specified directories on each line.
|
||||
func (m command) createLdsoconfdFile(in containerRoot, pattern string, dirs ...string) error {
|
||||
if len(dirs) == 0 {
|
||||
m.logger.Debugf("No directories to add to /etc/ld.so.conf")
|
||||
return nil
|
||||
}
|
||||
|
||||
ldsoconfdDir, err := in.resolve("/etc/ld.so.conf.d")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
|
||||
}
|
||||
|
||||
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create config file: %w", err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
|
||||
m.logger.Debugf("Adding directories %v to %v", dirs, configFile.Name())
|
||||
|
||||
added := make(map[string]bool)
|
||||
for _, dir := range dirs {
|
||||
if added[dir] {
|
||||
continue
|
||||
}
|
||||
_, err = fmt.Fprintf(configFile, "%s\n", dir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update config file: %w", err)
|
||||
}
|
||||
added[dir] = true
|
||||
}
|
||||
|
||||
// The created file needs to be world readable for the cases where the container is run as a non-root user.
|
||||
if err := configFile.Chmod(0644); err != nil {
|
||||
return fmt.Errorf("failed to chmod config file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractMajorVersion parses a version string and returns the major version as an int.
|
||||
func extractMajorVersion(version string) (int, error) {
|
||||
majorString := strings.SplitN(version, ".", 2)[0]
|
||||
return strconv.Atoi(majorString)
|
||||
}
|
||||
@@ -1,182 +0,0 @@
|
||||
/*
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package cudacompat
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCompatLibs(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
contents map[string]string
|
||||
hostDriverVersion string
|
||||
expectedContainerForwardCompatDir string
|
||||
}{
|
||||
{
|
||||
description: "empty root",
|
||||
hostDriverVersion: "222.55.66",
|
||||
},
|
||||
{
|
||||
description: "compat lib is newer; no ldcache",
|
||||
contents: map[string]string{
|
||||
"/usr/local/cuda/compat/libcuda.so.333.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
},
|
||||
{
|
||||
description: "compat lib is newer; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.333.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "/usr/local/cuda/compat",
|
||||
},
|
||||
{
|
||||
description: "compat lib is older; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.111.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "",
|
||||
},
|
||||
{
|
||||
description: "compat lib has same major version; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "",
|
||||
},
|
||||
{
|
||||
description: "numeric comparison is used; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "99.55.66",
|
||||
expectedContainerForwardCompatDir: "/usr/local/cuda/compat",
|
||||
},
|
||||
{
|
||||
description: "driver version empty; ldcache",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
|
||||
},
|
||||
hostDriverVersion: "",
|
||||
},
|
||||
{
|
||||
description: "symlinks are followed",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/etc/alternatives/cuda/compat/libcuda.so.333.88.99": "",
|
||||
"/usr/local/cuda": "symlink=/etc/alternatives/cuda",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "/etc/alternatives/cuda/compat",
|
||||
},
|
||||
{
|
||||
description: "symlinks stay in container",
|
||||
contents: map[string]string{
|
||||
"/etc/ld.so.cache": "",
|
||||
"/compat/libcuda.so.333.88.99": "",
|
||||
"/usr/local/cuda": "symlink=../../../../../../",
|
||||
},
|
||||
hostDriverVersion: "222.55.66",
|
||||
expectedContainerForwardCompatDir: "/compat",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
containerRootDir := t.TempDir()
|
||||
for name, contents := range tc.contents {
|
||||
target := filepath.Join(containerRootDir, name)
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(target), 0755))
|
||||
|
||||
if strings.HasPrefix(contents, "symlink=") {
|
||||
require.NoError(t, os.Symlink(strings.TrimPrefix(contents, "symlink="), target))
|
||||
continue
|
||||
}
|
||||
|
||||
require.NoError(t, os.WriteFile(target, []byte(contents), 0600))
|
||||
}
|
||||
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), tc.hostDriverVersion)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expectedContainerForwardCompatDir, containerForwardCompatDir)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateLdconfig(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
testCases := []struct {
|
||||
description string
|
||||
folders []string
|
||||
expectedContents string
|
||||
}{
|
||||
{
|
||||
description: "no folders; have no contents",
|
||||
},
|
||||
{
|
||||
description: "single folder is added",
|
||||
folders: []string{"/usr/local/cuda/compat"},
|
||||
expectedContents: "/usr/local/cuda/compat\n",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
containerRootDir := t.TempDir()
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
err := c.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, tc.folders...)
|
||||
require.NoError(t, err)
|
||||
|
||||
matches, err := filepath.Glob(filepath.Join(containerRootDir, "/etc/ld.so.conf.d/00-compat-*.conf"))
|
||||
require.NoError(t, err)
|
||||
|
||||
if tc.expectedContents == "" {
|
||||
require.Empty(t, matches)
|
||||
return
|
||||
}
|
||||
|
||||
require.Len(t, matches, 1)
|
||||
contents, err := os.ReadFile(matches[0])
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, tc.expectedContents, string(contents))
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,144 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package disabledevicenodemodification
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaDriverParamsPath = "/proc/driver/nvidia/params"
|
||||
)
|
||||
|
||||
type options struct {
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs an disable-device-node-modification subcommand with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
cfg := options{}
|
||||
|
||||
c := cli.Command{
|
||||
Name: "disable-device-node-modification",
|
||||
Usage: "Ensure that the /proc/driver/nvidia/params file present in the container does not allow device node modifications.",
|
||||
Before: func(c *cli.Context) error {
|
||||
return validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Hidden: true,
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func validateFlags(c *cli.Context, cfg *options) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func run(_ *cli.Context, cfg *options) error {
|
||||
modifiedParamsFileContents, err := getModifiedNVIDIAParamsContents()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get modified params file contents: %w", err)
|
||||
}
|
||||
if len(modifiedParamsFileContents) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %w", err)
|
||||
}
|
||||
|
||||
containerRootDirPath, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %w", err)
|
||||
}
|
||||
|
||||
return createParamsFileInContainer(containerRootDirPath, modifiedParamsFileContents)
|
||||
}
|
||||
|
||||
func getModifiedNVIDIAParamsContents() ([]byte, error) {
|
||||
hostNvidiaParamsFile, err := os.Open(nvidiaDriverParamsPath)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load params file: %w", err)
|
||||
}
|
||||
defer hostNvidiaParamsFile.Close()
|
||||
|
||||
modifiedContents, err := getModifiedParamsFileContentsFromReader(hostNvidiaParamsFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get modfied params file contents: %w", err)
|
||||
}
|
||||
|
||||
return modifiedContents, nil
|
||||
}
|
||||
|
||||
// getModifiedParamsFileContentsFromReader returns the contents of a modified params file from the specified reader.
|
||||
func getModifiedParamsFileContentsFromReader(r io.Reader) ([]byte, error) {
|
||||
var modified bytes.Buffer
|
||||
scanner := bufio.NewScanner(r)
|
||||
|
||||
var requiresModification bool
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "ModifyDeviceFiles: ") {
|
||||
if line == "ModifyDeviceFiles: 0" {
|
||||
return nil, nil
|
||||
}
|
||||
if line == "ModifyDeviceFiles: 1" {
|
||||
line = "ModifyDeviceFiles: 0"
|
||||
requiresModification = true
|
||||
}
|
||||
}
|
||||
if _, err := modified.WriteString(line + "\n"); err != nil {
|
||||
return nil, fmt.Errorf("failed to create output buffer: %w", err)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("failed to read params file: %w", err)
|
||||
}
|
||||
|
||||
if !requiresModification {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return modified.Bytes(), nil
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package disabledevicenodemodification
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetModifiedParamsFileContentsFromReader(t *testing.T) {
|
||||
testCases := map[string]struct {
|
||||
contents []byte
|
||||
expectedError error
|
||||
expectedContents []byte
|
||||
}{
|
||||
"no contents": {
|
||||
contents: nil,
|
||||
expectedError: nil,
|
||||
expectedContents: nil,
|
||||
},
|
||||
"other contents are ignored": {
|
||||
contents: []byte(`# Some other content
|
||||
that we don't care about
|
||||
`),
|
||||
expectedError: nil,
|
||||
expectedContents: nil,
|
||||
},
|
||||
"already zero requires no modification": {
|
||||
contents: []byte("ModifyDeviceFiles: 0"),
|
||||
expectedError: nil,
|
||||
expectedContents: nil,
|
||||
},
|
||||
"leading spaces require no modification": {
|
||||
contents: []byte(" ModifyDeviceFiles: 1"),
|
||||
},
|
||||
"Trailing spaces require no modification": {
|
||||
contents: []byte("ModifyDeviceFiles: 1 "),
|
||||
},
|
||||
"Not 1 require no modification": {
|
||||
contents: []byte("ModifyDeviceFiles: 11"),
|
||||
},
|
||||
"single line requires modification": {
|
||||
contents: []byte("ModifyDeviceFiles: 1"),
|
||||
expectedError: nil,
|
||||
expectedContents: []byte("ModifyDeviceFiles: 0\n"),
|
||||
},
|
||||
"single line with trailing newline requires modification": {
|
||||
contents: []byte("ModifyDeviceFiles: 1\n"),
|
||||
expectedError: nil,
|
||||
expectedContents: []byte("ModifyDeviceFiles: 0\n"),
|
||||
},
|
||||
"other content is maintained": {
|
||||
contents: []byte(`ModifyDeviceFiles: 1
|
||||
other content
|
||||
that
|
||||
is maintained`),
|
||||
expectedError: nil,
|
||||
expectedContents: []byte(`ModifyDeviceFiles: 0
|
||||
other content
|
||||
that
|
||||
is maintained
|
||||
`),
|
||||
},
|
||||
}
|
||||
|
||||
for description, tc := range testCases {
|
||||
t.Run(description, func(t *testing.T) {
|
||||
contents, err := getModifiedParamsFileContentsFromReader(bytes.NewReader(tc.contents))
|
||||
require.EqualValues(t, tc.expectedError, err)
|
||||
require.EqualValues(t, string(tc.expectedContents), string(contents))
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
//go:build linux
|
||||
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package disabledevicenodemodification
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
|
||||
tmpRoot, err := os.MkdirTemp("", "nvct-empty-dir*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create temp root: %w", err)
|
||||
}
|
||||
|
||||
if err := createTmpFs(tmpRoot, len(contents)); err != nil {
|
||||
return fmt.Errorf("failed to create tmpfs mount for params file: %w", err)
|
||||
}
|
||||
|
||||
modifiedParamsFile, err := os.OpenFile(filepath.Join(tmpRoot, "nvct-params"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0444)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open modified params file: %w", err)
|
||||
}
|
||||
defer modifiedParamsFile.Close()
|
||||
|
||||
if _, err := modifiedParamsFile.Write(contents); err != nil {
|
||||
return fmt.Errorf("failed to write temporary params file: %w", err)
|
||||
}
|
||||
|
||||
err = utils.WithProcfd(containerRootDirPath, nvidiaDriverParamsPath, func(nvidiaDriverParamsFdPath string) error {
|
||||
return unix.Mount(modifiedParamsFile.Name(), nvidiaDriverParamsFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to mount modified params file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createTmpFs(target string, size int) error {
|
||||
return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size))
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package disabledevicenodemodification
|
||||
|
||||
import "fmt"
|
||||
|
||||
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
|
||||
return fmt.Errorf("not supported")
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
|
||||
cli "github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands"
|
||||
)
|
||||
|
||||
// options defines the options that can be set for the CLI through config files,
|
||||
// environment variables, or command line flags
|
||||
type options struct {
|
||||
// Debug indicates whether the CLI is started in "debug" mode
|
||||
Debug bool
|
||||
// Quiet indicates whether the CLI is started in "quiet" mode
|
||||
Quiet bool
|
||||
}
|
||||
|
||||
func main() {
|
||||
logger := logrus.New()
|
||||
|
||||
// Create a options struct to hold the parsed environment variables or command line flags
|
||||
opts := options{}
|
||||
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "NVIDIA CDI Hook"
|
||||
c.UseShortOptionHandling = true
|
||||
c.EnableBashCompletion = true
|
||||
c.Usage = "Command to structure files for usage inside a container, called as hooks from a container runtime, defined in a CDI yaml file"
|
||||
c.Version = info.GetVersionString()
|
||||
|
||||
// We set the default action for the `nvidia-cdi-hook` command to issue a
|
||||
// warning and exit with no error.
|
||||
// This means that if an unsupported hook is run, a container will not fail
|
||||
// to launch. An unsupported hook could be the result of a CDI specification
|
||||
// referring to a new hook that is not yet supported by an older NVIDIA
|
||||
// Container Toolkit version or a hook that has been removed in newer
|
||||
// version.
|
||||
c.Action = func(ctx *cli.Context) error {
|
||||
commands.IssueUnsupportedHookWarning(logger, ctx)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Setup the flags for this command
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "debug",
|
||||
Aliases: []string{"d"},
|
||||
Usage: "Enable debug-level logging",
|
||||
Destination: &opts.Debug,
|
||||
// TODO: Support for NVIDIA_CDI_DEBUG is deprecated and NVIDIA_CTK_DEBUG should be used instead.
|
||||
EnvVars: []string{"NVIDIA_CTK_DEBUG", "NVIDIA_CDI_DEBUG"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "quiet",
|
||||
Usage: "Suppress all output except for errors; overrides --debug",
|
||||
Destination: &opts.Quiet,
|
||||
// TODO: Support for NVIDIA_CDI_QUIET is deprecated and NVIDIA_CTK_QUIET should be used instead.
|
||||
EnvVars: []string{"NVDIA_CTK_QUIET", "NVIDIA_CDI_QUIET"},
|
||||
},
|
||||
}
|
||||
|
||||
// Set log-level for all subcommands
|
||||
c.Before = func(c *cli.Context) error {
|
||||
logLevel := logrus.InfoLevel
|
||||
if opts.Debug {
|
||||
logLevel = logrus.DebugLevel
|
||||
}
|
||||
if opts.Quiet {
|
||||
logLevel = logrus.ErrorLevel
|
||||
}
|
||||
logger.SetLevel(logLevel)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Define the subcommands
|
||||
c.Commands = commands.New(logger)
|
||||
|
||||
// Run the CLI
|
||||
err := c.Run(os.Args)
|
||||
if err != nil {
|
||||
logger.Errorf("%v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/moby/sys/symlink"
|
||||
)
|
||||
|
||||
// A containerRoot represents the root filesystem of a container.
|
||||
type containerRoot string
|
||||
|
||||
// hasPath checks whether the specified path exists in the root.
|
||||
func (r containerRoot) hasPath(path string) bool {
|
||||
resolved, err := r.resolve(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// resolve returns the absolute path including root path.
|
||||
// Symlinks are resolved, but are guaranteed to resolve in the root.
|
||||
func (r containerRoot) resolve(path string) (string, error) {
|
||||
absolute := filepath.Clean(filepath.Join(string(r), path))
|
||||
return symlink.FollowSymlinkInScope(absolute, string(r))
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
//go:build linux
|
||||
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
|
||||
"github.com/moby/sys/reexec"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// pivotRoot will call pivot_root such that rootfs becomes the new root
|
||||
// filesystem, and everything else is cleaned up.
|
||||
// This is adapted from the implementation here:
|
||||
//
|
||||
// https://github.com/opencontainers/runc/blob/e89a29929c775025419ab0d218a43588b4c12b9a/libcontainer/rootfs_linux.go#L1056-L1113
|
||||
//
|
||||
// With the `mount` and `unmount` calls changed to direct unix.Mount and unix.Unmount calls.
|
||||
func pivotRoot(rootfs string) error {
|
||||
// While the documentation may claim otherwise, pivot_root(".", ".") is
|
||||
// actually valid. What this results in is / being the new root but
|
||||
// /proc/self/cwd being the old root. Since we can play around with the cwd
|
||||
// with pivot_root this allows us to pivot without creating directories in
|
||||
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
|
||||
|
||||
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return &os.PathError{Op: "open", Path: "/", Err: err}
|
||||
}
|
||||
defer unix.Close(oldroot) //nolint: errcheck
|
||||
|
||||
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return &os.PathError{Op: "open", Path: rootfs, Err: err}
|
||||
}
|
||||
defer unix.Close(newroot) //nolint: errcheck
|
||||
|
||||
// Change to the new root so that the pivot_root actually acts on it.
|
||||
if err := unix.Fchdir(newroot); err != nil {
|
||||
return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err}
|
||||
}
|
||||
|
||||
if err := unix.PivotRoot(".", "."); err != nil {
|
||||
return &os.PathError{Op: "pivot_root", Path: ".", Err: err}
|
||||
}
|
||||
|
||||
// Currently our "." is oldroot (according to the current kernel code).
|
||||
// However, purely for safety, we will fchdir(oldroot) since there isn't
|
||||
// really any guarantee from the kernel what /proc/self/cwd will be after a
|
||||
// pivot_root(2).
|
||||
|
||||
if err := unix.Fchdir(oldroot); err != nil {
|
||||
return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err}
|
||||
}
|
||||
|
||||
// Make oldroot rslave to make sure our unmounts don't propagate to the
|
||||
// host (and thus bork the machine). We don't use rprivate because this is
|
||||
// known to cause issues due to races where we still have a reference to a
|
||||
// mount while a process in the host namespace are trying to operate on
|
||||
// something they think has no mounts (devicemapper in particular).
|
||||
if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
// Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
|
||||
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Switch back to our shiny new root.
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return &os.PathError{Op: "chdir", Path: "/", Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mountLdConfig mounts the host ldconfig to the mount namespace of the hook.
|
||||
// We use WithProcfd to perform the mount operations to ensure that the changes
|
||||
// are persisted across the pivot root.
|
||||
func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) {
|
||||
hostLdconfigInfo, err := os.Stat(hostLdconfigPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error reading host ldconfig: %w", err)
|
||||
}
|
||||
|
||||
hookScratchDirPath := "/var/run/nvidia-ctk-hook"
|
||||
ldconfigPath := filepath.Join(hookScratchDirPath, "ldconfig")
|
||||
if err := utils.MkdirAllInRoot(containerRootDirPath, hookScratchDirPath, 0755); err != nil {
|
||||
return "", fmt.Errorf("error creating hook scratch folder: %w", err)
|
||||
}
|
||||
|
||||
err = utils.WithProcfd(containerRootDirPath, hookScratchDirPath, func(hookScratchDirFdPath string) error {
|
||||
return createTmpFs(hookScratchDirFdPath, int(hostLdconfigInfo.Size()))
|
||||
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error creating tmpfs: %w", err)
|
||||
}
|
||||
|
||||
if _, err := createFileInRoot(containerRootDirPath, ldconfigPath, hostLdconfigInfo.Mode()); err != nil {
|
||||
return "", fmt.Errorf("error creating ldconfig: %w", err)
|
||||
}
|
||||
|
||||
err = utils.WithProcfd(containerRootDirPath, ldconfigPath, func(ldconfigFdPath string) error {
|
||||
return unix.Mount(hostLdconfigPath, ldconfigFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error bind mounting host ldconfig: %w", err)
|
||||
}
|
||||
|
||||
return ldconfigPath, nil
|
||||
}
|
||||
|
||||
func createFileInRoot(containerRootDirPath string, destinationPath string, mode os.FileMode) (string, error) {
|
||||
dest, err := securejoin.SecureJoin(containerRootDirPath, destinationPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// Make the parent directory.
|
||||
destDir, destBase := filepath.Split(dest)
|
||||
destDirFd, err := utils.MkdirAllInRootOpen(containerRootDirPath, destDir, 0755)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error creating parent dir: %w", err)
|
||||
}
|
||||
defer destDirFd.Close()
|
||||
// Make the target file. We want to avoid opening any file that is
|
||||
// already there because it could be a "bad" file like an invalid
|
||||
// device or hung tty that might cause a DoS, so we use mknodat.
|
||||
// destBase does not contain any "/" components, and mknodat does
|
||||
// not follow trailing symlinks, so we can safely just call mknodat
|
||||
// here.
|
||||
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|uint32(mode), 0); err != nil {
|
||||
// If we get EEXIST, there was already an inode there and
|
||||
// we can consider that a success.
|
||||
if !errors.Is(err, unix.EEXIST) {
|
||||
return "", fmt.Errorf("error creating empty file: %w", err)
|
||||
}
|
||||
}
|
||||
return dest, nil
|
||||
}
|
||||
|
||||
// mountProc mounts a clean proc filesystem in the new root.
|
||||
func mountProc(newroot string) error {
|
||||
target := filepath.Join(newroot, "/proc")
|
||||
|
||||
if err := os.MkdirAll(target, 0755); err != nil {
|
||||
return fmt.Errorf("error creating directory: %w", err)
|
||||
}
|
||||
return unix.Mount("proc", target, "proc", 0, "")
|
||||
}
|
||||
|
||||
// createTmpFs creates a tmpfs at the specified location with the specified size.
|
||||
func createTmpFs(target string, size int) error {
|
||||
return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size))
|
||||
}
|
||||
|
||||
// createReexecCommand creates a command that can be used to trigger the reexec
|
||||
// initializer.
|
||||
// On linux this command runs in new namespaces.
|
||||
func createReexecCommand(args []string) *exec.Cmd {
|
||||
cmd := reexec.Command(args...)
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Cloneflags: syscall.CLONE_NEWNS |
|
||||
syscall.CLONE_NEWUTS |
|
||||
syscall.CLONE_NEWIPC |
|
||||
syscall.CLONE_NEWPID |
|
||||
syscall.CLONE_NEWNET,
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
//go:build !linux
|
||||
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
"github.com/moby/sys/reexec"
|
||||
)
|
||||
|
||||
func pivotRoot(newroot string) error {
|
||||
return fmt.Errorf("not supported")
|
||||
}
|
||||
|
||||
func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) {
|
||||
return "", fmt.Errorf("not supported")
|
||||
}
|
||||
|
||||
func mountProc(newroot string) error {
|
||||
return fmt.Errorf("not supported")
|
||||
}
|
||||
|
||||
// createReexecCommand creates a command that can be used ot trigger the reexec
|
||||
// initializer.
|
||||
func createReexecCommand(args []string) *exec.Cmd {
|
||||
cmd := reexec.Command(args...)
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
//go:build linux
|
||||
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/exeseal"
|
||||
)
|
||||
|
||||
// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it.
|
||||
func SafeExec(path string, args []string, envv []string) error {
|
||||
safeExe, err := cloneBinary(path)
|
||||
if err != nil {
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(path, args, envv)
|
||||
}
|
||||
defer safeExe.Close()
|
||||
|
||||
exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(exePath, args, envv)
|
||||
}
|
||||
|
||||
func cloneBinary(path string) (*os.File, error) {
|
||||
exe, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening current binary: %w", err)
|
||||
}
|
||||
defer exe.Close()
|
||||
|
||||
stat, err := exe.Stat()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("checking %v size: %w", path, err)
|
||||
}
|
||||
size := stat.Size()
|
||||
|
||||
return exeseal.CloneBinary(exe, size, path, os.TempDir())
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
//go:build !linux
|
||||
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import "syscall"
|
||||
|
||||
// SafeExec is not implemented on non-linux systems and forwards directly to the
|
||||
// Exec syscall.
|
||||
func SafeExec(path string, args []string, envv []string) error {
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(path, args, envv)
|
||||
}
|
||||
@@ -1,255 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/moby/sys/reexec"
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
// ldsoconfdFilenamePattern specifies the pattern for the filename
|
||||
// in ld.so.conf.d that includes references to the specified directories.
|
||||
// The 00-nvcr prefix is chosen to ensure that these libraries have a
|
||||
// higher precedence than other libraries on the system, but lower than
|
||||
// the 00-cuda-compat that is included in some containers.
|
||||
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
|
||||
|
||||
reexecUpdateLdCacheCommandName = "reexec-update-ldcache"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type options struct {
|
||||
folders cli.StringSlice
|
||||
ldconfigPath string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
func init() {
|
||||
reexec.Register(reexecUpdateLdCacheCommandName, updateLdCacheHandler)
|
||||
if reexec.Init() {
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
// NewCommand constructs an update-ldcache command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the update-ldcache command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := options{}
|
||||
|
||||
// Create the 'update-ldcache' command
|
||||
c := cli.Command{
|
||||
Name: "update-ldcache",
|
||||
Usage: "Update ldcache in a container by running ldconfig",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "folder",
|
||||
Usage: "Specify a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Destination: &cfg.folders,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "ldconfig-path",
|
||||
Usage: "Specify the path to the ldconfig program",
|
||||
Destination: &cfg.ldconfigPath,
|
||||
Value: "/sbin/ldconfig",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, cfg *options) error {
|
||||
if cfg.ldconfigPath == "" {
|
||||
return errors.New("ldconfig-path must be specified")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *options) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRootDir, err := s.GetContainerRoot()
|
||||
if err != nil || containerRootDir == "" || containerRootDir == "/" {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
args := []string{
|
||||
reexecUpdateLdCacheCommandName,
|
||||
strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"),
|
||||
containerRootDir,
|
||||
}
|
||||
args = append(args, cfg.folders.Value()...)
|
||||
|
||||
cmd := createReexecCommand(args)
|
||||
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// updateLdCacheHandler wraps updateLdCache with error handling.
|
||||
func updateLdCacheHandler() {
|
||||
if err := updateLdCache(os.Args); err != nil {
|
||||
log.Printf("Error updating ldcache: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// updateLdCache is invoked from a reexec'd handler and provides namespace
|
||||
// isolation for the operations performed by this hook.
|
||||
// At the point where this is invoked, we are in a new mount namespace that is
|
||||
// cloned from the parent.
|
||||
//
|
||||
// args[0] is the reexec initializer function name
|
||||
// args[1] is the path of the ldconfig binary on the host
|
||||
// args[2] is the container root directory
|
||||
// The remaining args are folders that need to be added to the ldcache.
|
||||
func updateLdCache(args []string) error {
|
||||
if len(args) < 3 {
|
||||
return fmt.Errorf("incorrect arguments: %v", args)
|
||||
}
|
||||
hostLdconfigPath := args[1]
|
||||
containerRootDirPath := args[2]
|
||||
|
||||
// To prevent leaking the parent proc filesystem, we create a new proc mount
|
||||
// in the container root.
|
||||
if err := mountProc(containerRootDirPath); err != nil {
|
||||
return fmt.Errorf("error mounting /proc: %w", err)
|
||||
}
|
||||
|
||||
// We mount the host ldconfig before we pivot root since host paths are not
|
||||
// visible after the pivot root operation.
|
||||
ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error mounting host ldconfig: %w", err)
|
||||
}
|
||||
|
||||
// We pivot to the container root for the new process, this further limits
|
||||
// access to the host.
|
||||
if err := pivotRoot(containerRootDirPath); err != nil {
|
||||
return fmt.Errorf("error running pivot_root: %w", err)
|
||||
}
|
||||
|
||||
return runLdconfig(ldconfigPath, args[3:]...)
|
||||
}
|
||||
|
||||
// runLdconfig runs the ldconfig binary and ensures that the specified directories
|
||||
// are processed for the ldcache.
|
||||
func runLdconfig(ldconfigPath string, directories ...string) error {
|
||||
args := []string{
|
||||
"ldconfig",
|
||||
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
|
||||
// be configured to use a different config file by default.
|
||||
// Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is
|
||||
// in the container.
|
||||
"-f", "/etc/ld.so.conf",
|
||||
}
|
||||
|
||||
containerRoot := containerRoot("/")
|
||||
|
||||
if containerRoot.hasPath("/etc/ld.so.cache") {
|
||||
args = append(args, "-C", "/etc/ld.so.cache")
|
||||
} else {
|
||||
args = append(args, "-N")
|
||||
}
|
||||
|
||||
if containerRoot.hasPath("/etc/ld.so.conf.d") {
|
||||
err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %w", err)
|
||||
}
|
||||
} else {
|
||||
args = append(args, directories...)
|
||||
}
|
||||
|
||||
return SafeExec(ldconfigPath, args, nil)
|
||||
}
|
||||
|
||||
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/.
|
||||
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
|
||||
// contains the specified directories on each line.
|
||||
func createLdsoconfdFile(pattern string, dirs ...string) error {
|
||||
if len(dirs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ldsoconfdDir := "/etc/ld.so.conf.d"
|
||||
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
|
||||
}
|
||||
|
||||
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create config file: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = configFile.Close()
|
||||
}()
|
||||
|
||||
added := make(map[string]bool)
|
||||
for _, dir := range dirs {
|
||||
if added[dir] {
|
||||
continue
|
||||
}
|
||||
_, err = fmt.Fprintf(configFile, "%s\n", dir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update config file: %w", err)
|
||||
}
|
||||
added[dir] = true
|
||||
}
|
||||
|
||||
// The created file needs to be world readable for the cases where the container is run as a non-root user.
|
||||
if err := configFile.Chmod(0644); err != nil {
|
||||
return fmt.Errorf("failed to chmod config file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
@@ -13,11 +15,31 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
||||
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
|
||||
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
|
||||
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
|
||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
)
|
||||
|
||||
const (
|
||||
capSysAdmin = "CAP_SYS_ADMIN"
|
||||
)
|
||||
|
||||
const (
|
||||
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||
)
|
||||
|
||||
type nvidiaConfig struct {
|
||||
Devices []string
|
||||
Devices string
|
||||
MigConfigDevices string
|
||||
MigMonitorDevices string
|
||||
ImexChannels []string
|
||||
ImexChannels string
|
||||
DriverCapabilities string
|
||||
// Requirements defines the requirements DSL for the container to run.
|
||||
// This is empty if no specific requirements are needed, or if requirements are
|
||||
@@ -55,14 +77,23 @@ type LinuxCapabilities struct {
|
||||
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// Mount from OCI runtime spec
|
||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
|
||||
type Mount struct {
|
||||
Destination string `json:"destination"`
|
||||
Type string `json:"type,omitempty" platform:"linux,solaris"`
|
||||
Source string `json:"source,omitempty"`
|
||||
Options []string `json:"options,omitempty"`
|
||||
}
|
||||
|
||||
// Spec from OCI runtime spec
|
||||
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
|
||||
type Spec struct {
|
||||
Version *string `json:"ociVersion"`
|
||||
Process *Process `json:"process,omitempty"`
|
||||
Root *Root `json:"root,omitempty"`
|
||||
Mounts []specs.Mount `json:"mounts,omitempty"`
|
||||
Version *string `json:"ociVersion"`
|
||||
Process *Process `json:"process,omitempty"`
|
||||
Root *Root `json:"root,omitempty"`
|
||||
Mounts []Mount `json:"mounts,omitempty"`
|
||||
}
|
||||
|
||||
// HookState holds state information about the hook
|
||||
@@ -99,9 +130,9 @@ func loadSpec(path string) (spec *Spec) {
|
||||
return
|
||||
}
|
||||
|
||||
func (s *Spec) GetCapabilities() []string {
|
||||
if s == nil || s.Process == nil || s.Process.Capabilities == nil {
|
||||
return nil
|
||||
func isPrivileged(s *Spec) bool {
|
||||
if s.Process.Capabilities == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var caps []string
|
||||
@@ -114,72 +145,150 @@ func (s *Spec) GetCapabilities() []string {
|
||||
if err != nil {
|
||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||
}
|
||||
return caps
|
||||
for _, c := range caps {
|
||||
if c == capSysAdmin {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Otherwise, parse s.Process.Capabilities as:
|
||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
||||
capabilities := specs.LinuxCapabilities{}
|
||||
err := json.Unmarshal(*s.Process.Capabilities, &capabilities)
|
||||
process := specs.Process{
|
||||
Env: s.Process.Env,
|
||||
}
|
||||
|
||||
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
|
||||
if err != nil {
|
||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||
}
|
||||
|
||||
return image.OCISpecCapabilities(capabilities).GetCapabilities()
|
||||
}
|
||||
|
||||
func isPrivileged(s *Spec) bool {
|
||||
return image.IsPrivileged(s)
|
||||
}
|
||||
|
||||
func getMigConfigDevices(i image.CUDA) *string {
|
||||
return getMigDevices(i, image.EnvVarNvidiaMigConfigDevices)
|
||||
}
|
||||
|
||||
func getMigMonitorDevices(i image.CUDA) *string {
|
||||
return getMigDevices(i, image.EnvVarNvidiaMigMonitorDevices)
|
||||
}
|
||||
|
||||
func getMigDevices(image image.CUDA, envvar string) *string {
|
||||
if !image.HasEnvvar(envvar) {
|
||||
return nil
|
||||
}
|
||||
devices := image.Getenv(envvar)
|
||||
return &devices
|
||||
}
|
||||
|
||||
func (hookConfig *hookConfig) getImexChannels(image image.CUDA, privileged bool) []string {
|
||||
if hookConfig.Features.IgnoreImexChannelRequests.IsEnabled() {
|
||||
return nil
|
||||
fullSpec := specs.Spec{
|
||||
Version: *s.Version,
|
||||
Process: &process,
|
||||
}
|
||||
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := image.ImexChannelsFromMounts()
|
||||
if len(devices) > 0 {
|
||||
return devices
|
||||
return image.IsPrivileged(&fullSpec)
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||
// if specified.
|
||||
var hasSwarmEnvvar bool
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if _, exists := image[envvar]; exists {
|
||||
hasSwarmEnvvar = true
|
||||
break
|
||||
}
|
||||
}
|
||||
devices := image.ImexChannelsFromEnvVar()
|
||||
|
||||
var devices []string
|
||||
if hasSwarmEnvvar {
|
||||
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||
} else {
|
||||
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
|
||||
}
|
||||
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
devicesString := strings.Join(devices, ",")
|
||||
|
||||
return &devicesString
|
||||
}
|
||||
|
||||
func getDevicesFromMounts(mounts []Mount) *string {
|
||||
var devices []string
|
||||
for _, m := range mounts {
|
||||
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
||||
source := filepath.Clean(m.Source)
|
||||
destination := filepath.Clean(m.Destination)
|
||||
|
||||
// Only consider mounts who's host volume is /dev/null
|
||||
if source != "/dev/null" {
|
||||
continue
|
||||
}
|
||||
// Only consider container mount points that begin with 'root'
|
||||
if len(destination) < len(root) {
|
||||
continue
|
||||
}
|
||||
if destination[:len(root)] != root {
|
||||
continue
|
||||
}
|
||||
// Grab the full path beyond 'root' and add it to the list of devices
|
||||
device := destination[len(root):]
|
||||
if len(device) > 0 && device[0] == '/' {
|
||||
device = device[1:]
|
||||
}
|
||||
if len(device) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, device)
|
||||
}
|
||||
|
||||
if devices == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
ret := strings.Join(devices, ",")
|
||||
return &ret
|
||||
}
|
||||
|
||||
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := getDevicesFromMounts(mounts)
|
||||
if devices != nil {
|
||||
return devices
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to reading from the environment variable if privileges are correct
|
||||
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
||||
if devices == nil {
|
||||
return nil
|
||||
}
|
||||
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
||||
return devices
|
||||
}
|
||||
|
||||
configName := hookConfig.getConfigOption("AcceptEnvvarUnprivileged")
|
||||
log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, %v=%v) ", privileged, configName, hookConfig.AcceptEnvvarUnprivileged)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
|
||||
func getMigConfigDevices(env map[string]string) *string {
|
||||
if devices, ok := env[envNVMigConfigDevices]; ok {
|
||||
return &devices
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMigMonitorDevices(env map[string]string) *string {
|
||||
if devices, ok := env[envNVMigMonitorDevices]; ok {
|
||||
return &devices
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getImexChannels(env map[string]string) *string {
|
||||
if chans, ok := env[envNVImexChannels]; ok {
|
||||
return &chans
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
|
||||
// We use the default driver capabilities by default. This is filtered to only include the
|
||||
// supported capabilities
|
||||
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
|
||||
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
|
||||
|
||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||
|
||||
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
|
||||
capsEnv := cudaImage.Getenv(image.EnvVarNvidiaDriverCapabilities)
|
||||
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
|
||||
|
||||
if !capsEnvSpecified && legacyImage {
|
||||
// Environment variable unset with legacy image: set all capabilities.
|
||||
@@ -198,12 +307,14 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
||||
return capabilities
|
||||
}
|
||||
|
||||
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
|
||||
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := image.IsLegacy()
|
||||
|
||||
devices := image.VisibleDevices()
|
||||
if len(devices) == 0 {
|
||||
// empty devices means this is not a GPU container.
|
||||
var devices string
|
||||
if d := getDevices(hookConfig, image, mounts, privileged); d != nil {
|
||||
devices = *d
|
||||
} else {
|
||||
// 'nil' devices means this is not a GPU container.
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -223,7 +334,10 @@ func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool)
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
imexChannels := hookConfig.getImexChannels(image, privileged)
|
||||
var imexChannels string
|
||||
if c := getImexChannels(image); c != nil {
|
||||
imexChannels = *c
|
||||
}
|
||||
|
||||
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
|
||||
|
||||
@@ -242,7 +356,7 @@ func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool)
|
||||
}
|
||||
}
|
||||
|
||||
func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
|
||||
func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
var h HookState
|
||||
d := json.NewDecoder(os.Stdin)
|
||||
if err := d.Decode(&h); err != nil {
|
||||
@@ -256,25 +370,19 @@ func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
privileged := isPrivileged(s)
|
||||
|
||||
i, err := image.New(
|
||||
image, err := image.New(
|
||||
image.WithEnv(s.Process.Env),
|
||||
image.WithMounts(s.Mounts),
|
||||
image.WithPrivileged(privileged),
|
||||
image.WithDisableRequire(hookConfig.DisableRequire),
|
||||
image.WithAcceptDeviceListAsVolumeMounts(hookConfig.AcceptDeviceListAsVolumeMounts),
|
||||
image.WithAcceptEnvvarUnprivileged(hookConfig.AcceptEnvvarUnprivileged),
|
||||
image.WithPreferredVisibleDevicesEnvVars(hookConfig.getSwarmResourceEnvvars()...),
|
||||
image.WithDisableRequire(hook.DisableRequire),
|
||||
)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
privileged := isPrivileged(s)
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Image: i,
|
||||
Nvidia: hookConfig.getNvidiaConfig(i, privileged),
|
||||
Image: image,
|
||||
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -17,10 +17,18 @@ const (
|
||||
driverPath = "/run/nvidia/driver"
|
||||
)
|
||||
|
||||
// hookConfig wraps the toolkit config.
|
||||
// This allows for functions to be defined on the local type.
|
||||
type hookConfig struct {
|
||||
*config.Config
|
||||
var defaultPaths = [...]string{}
|
||||
|
||||
// HookConfig : options for the nvidia-container-runtime-hook.
|
||||
type HookConfig config.Config
|
||||
|
||||
func getDefaultHookConfig() (HookConfig, error) {
|
||||
defaultCfg, err := config.GetDefault()
|
||||
if err != nil {
|
||||
return HookConfig{}, err
|
||||
}
|
||||
|
||||
return *(*HookConfig)(defaultCfg), nil
|
||||
}
|
||||
|
||||
// loadConfig loads the required paths for the hook config.
|
||||
@@ -50,12 +58,12 @@ func loadConfig() (*config.Config, error) {
|
||||
return config.GetDefault()
|
||||
}
|
||||
|
||||
func getHookConfig() (*hookConfig, error) {
|
||||
func getHookConfig() (*HookConfig, error) {
|
||||
cfg, err := loadConfig()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load config: %v", err)
|
||||
}
|
||||
config := &hookConfig{cfg}
|
||||
config := (*HookConfig)(cfg)
|
||||
|
||||
allSupportedDriverCapabilities := image.SupportedDriverCapabilities
|
||||
if config.SupportedDriverCapabilities == "all" {
|
||||
@@ -73,7 +81,7 @@ func getHookConfig() (*hookConfig, error) {
|
||||
|
||||
// getConfigOption returns the toml config option associated with the
|
||||
// specified struct field.
|
||||
func (c hookConfig) getConfigOption(fieldName string) string {
|
||||
func (c HookConfig) getConfigOption(fieldName string) string {
|
||||
t := reflect.TypeOf(c)
|
||||
f, ok := t.FieldByName(fieldName)
|
||||
if !ok {
|
||||
@@ -87,8 +95,8 @@ func (c hookConfig) getConfigOption(fieldName string) string {
|
||||
}
|
||||
|
||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||
func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c == nil || c.SwarmResource == "" {
|
||||
func (c *HookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c.SwarmResource == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -104,26 +112,3 @@ func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
||||
|
||||
return envvars
|
||||
}
|
||||
|
||||
// nvidiaContainerCliCUDACompatModeFlags returns required --cuda-compat-mode
|
||||
// flag(s) depending on the hook and runtime configurations.
|
||||
func (c *hookConfig) nvidiaContainerCliCUDACompatModeFlags() []string {
|
||||
var flag string
|
||||
switch c.NVIDIAContainerRuntimeConfig.Modes.Legacy.CUDACompatMode {
|
||||
case config.CUDACompatModeLdconfig:
|
||||
flag = "--cuda-compat-mode=ldconfig"
|
||||
case config.CUDACompatModeMount:
|
||||
flag = "--cuda-compat-mode=mount"
|
||||
case config.CUDACompatModeDisabled, config.CUDACompatModeHook:
|
||||
flag = "--cuda-compat-mode=disabled"
|
||||
default:
|
||||
if !c.Features.AllowCUDACompatLibsFromContainer.IsEnabled() {
|
||||
flag = "--cuda-compat-mode=disabled"
|
||||
}
|
||||
}
|
||||
|
||||
if flag == "" {
|
||||
return nil
|
||||
}
|
||||
return []string{flag}
|
||||
}
|
||||
|
||||
@@ -21,10 +21,8 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetHookConfig(t *testing.T) {
|
||||
@@ -85,15 +83,15 @@ func TestGetHookConfig(t *testing.T) {
|
||||
configflag = &filename
|
||||
|
||||
for _, line := range tc.lines {
|
||||
_, err := fmt.Fprintf(configFile, "%s\n", line)
|
||||
_, err := configFile.WriteString(fmt.Sprintf("%s\n", line))
|
||||
require.NoError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
var cfg hookConfig
|
||||
var config HookConfig
|
||||
getHookConfig := func() {
|
||||
c, _ := getHookConfig()
|
||||
cfg = *c
|
||||
config = *c
|
||||
}
|
||||
|
||||
if tc.expectedPanic {
|
||||
@@ -103,7 +101,7 @@ func TestGetHookConfig(t *testing.T) {
|
||||
|
||||
getHookConfig()
|
||||
|
||||
require.EqualValues(t, tc.expectedDriverCapabilities, cfg.SupportedDriverCapabilities)
|
||||
require.EqualValues(t, tc.expectedDriverCapabilities, config.SupportedDriverCapabilities)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -145,10 +143,8 @@ func TestGetSwarmResourceEnvvars(t *testing.T) {
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
c := &hookConfig{
|
||||
Config: &config.Config{
|
||||
SwarmResource: tc.value,
|
||||
},
|
||||
c := &HookConfig{
|
||||
SwarmResource: tc.value,
|
||||
}
|
||||
|
||||
envvars := c.getSwarmResourceEnvvars()
|
||||
|
||||
@@ -75,7 +75,7 @@ func doPrestart() {
|
||||
}
|
||||
cli := hook.NVIDIAContainerCLIConfig
|
||||
|
||||
container := hook.getContainerConfig()
|
||||
container := getContainerConfig(*hook)
|
||||
nvidia := container.Nvidia
|
||||
if nvidia == nil {
|
||||
// Not a GPU container, nothing to do.
|
||||
@@ -95,9 +95,6 @@ func doPrestart() {
|
||||
if cli.LoadKmods {
|
||||
args = append(args, "--load-kmods")
|
||||
}
|
||||
if hook.Features.DisableImexChannelCreation.IsEnabled() {
|
||||
args = append(args, "--no-create-imex-channels")
|
||||
}
|
||||
if cli.NoPivot {
|
||||
args = append(args, "--no-pivot")
|
||||
}
|
||||
@@ -114,16 +111,14 @@ func doPrestart() {
|
||||
}
|
||||
args = append(args, "configure")
|
||||
|
||||
args = append(args, hook.nvidiaContainerCliCUDACompatModeFlags()...)
|
||||
|
||||
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
|
||||
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
|
||||
if cli.Ldconfig != "" {
|
||||
args = append(args, fmt.Sprintf("--ldconfig=%s", cli.Ldconfig))
|
||||
}
|
||||
if cli.NoCgroups {
|
||||
args = append(args, "--no-cgroups")
|
||||
}
|
||||
if devicesString := strings.Join(nvidia.Devices, ","); len(devicesString) > 0 {
|
||||
args = append(args, fmt.Sprintf("--device=%s", devicesString))
|
||||
if len(nvidia.Devices) > 0 {
|
||||
args = append(args, fmt.Sprintf("--device=%s", nvidia.Devices))
|
||||
}
|
||||
if len(nvidia.MigConfigDevices) > 0 {
|
||||
args = append(args, fmt.Sprintf("--mig-config=%s", nvidia.MigConfigDevices))
|
||||
@@ -131,8 +126,8 @@ func doPrestart() {
|
||||
if len(nvidia.MigMonitorDevices) > 0 {
|
||||
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
|
||||
}
|
||||
if imexString := strings.Join(nvidia.ImexChannels, ","); len(imexString) > 0 {
|
||||
args = append(args, fmt.Sprintf("--imex-channel=%s", imexString))
|
||||
if len(nvidia.ImexChannels) > 0 {
|
||||
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
|
||||
}
|
||||
|
||||
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
|
||||
@@ -150,7 +145,6 @@ func doPrestart() {
|
||||
args = append(args, rootfs)
|
||||
|
||||
env := append(os.Environ(), cli.Environment...)
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection?
|
||||
err = syscall.Exec(args[0], args, env)
|
||||
log.Panicln("exec failed:", err)
|
||||
}
|
||||
|
||||
@@ -21,8 +21,8 @@ The `runtimes` config option allows for the low-level runtime to be specified. T
|
||||
The default value for this setting is:
|
||||
```toml
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
"crun",
|
||||
]
|
||||
```
|
||||
|
||||
@@ -85,126 +85,3 @@ Alternatively the NVIDIA Container Runtime can be set as the default runtime for
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Environment variables (OCI spec)
|
||||
|
||||
Each environment variable maps to an command-line argument for `nvidia-container-cli` from [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
|
||||
These variables are already set in our [official CUDA images](https://hub.docker.com/r/nvidia/cuda/).
|
||||
|
||||
### `NVIDIA_VISIBLE_DEVICES`
|
||||
This variable controls which GPUs will be made accessible inside the container.
|
||||
|
||||
#### Possible values
|
||||
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
|
||||
* `all`: all GPUs will be accessible, this is the default value in our container images.
|
||||
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
|
||||
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
|
||||
|
||||
**Note**: When running on a MIG capable device, the following values will also be available:
|
||||
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
|
||||
|
||||
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
|
||||
```
|
||||
$ nvidia-smi -L
|
||||
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
|
||||
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
|
||||
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
|
||||
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
|
||||
```
|
||||
|
||||
### `NVIDIA_MIG_CONFIG_DEVICES`
|
||||
This variable controls which of the visible GPUs can have their MIG
|
||||
configuration managed from within the container. This includes enabling and
|
||||
disabling MIG mode, creating and destroying GPU Instances and Compute
|
||||
Instances, etc.
|
||||
|
||||
#### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG configurations managed.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
|
||||
|
||||
### `NVIDIA_MIG_MONITOR_DEVICES`
|
||||
This variable controls which of the visible GPUs can have aggregate information
|
||||
about all of their MIG devices monitored from within the container. This
|
||||
includes inspecting the aggregate memory usage, listing the aggregate running
|
||||
processes, etc.
|
||||
|
||||
#### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG devices monitored.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
|
||||
|
||||
### `NVIDIA_DRIVER_CAPABILITIES`
|
||||
This option controls which driver libraries/binaries will be mounted inside the container.
|
||||
|
||||
#### Possible values
|
||||
* `compute,video`, `graphics,utility` …: a comma-separated list of driver features the container needs.
|
||||
* `all`: enable all available driver capabilities.
|
||||
* *empty* or *unset*: use default driver capability: `utility,compute`.
|
||||
|
||||
#### Supported driver capabilities
|
||||
* `compute`: required for CUDA and OpenCL applications.
|
||||
* `compat32`: required for running 32-bit applications.
|
||||
* `graphics`: required for running OpenGL and Vulkan applications.
|
||||
* `utility`: required for using `nvidia-smi` and NVML.
|
||||
* `video`: required for using the Video Codec SDK.
|
||||
* `display`: required for leveraging X11 display.
|
||||
|
||||
### `NVIDIA_REQUIRE_*`
|
||||
A logical expression to define constraints on the configurations supported by the container.
|
||||
|
||||
#### Supported constraints
|
||||
* `cuda`: constraint on the CUDA driver version.
|
||||
* `driver`: constraint on the driver version.
|
||||
* `arch`: constraint on the compute architectures of the selected GPUs.
|
||||
* `brand`: constraint on the brand of the selected GPUs (e.g. GeForce, Tesla, GRID).
|
||||
|
||||
#### Expressions
|
||||
Multiple constraints can be expressed in a single environment variable: space-separated constraints are ORed, comma-separated constraints are ANDed.
|
||||
Multiple environment variables of the form `NVIDIA_REQUIRE_*` are ANDed together.
|
||||
|
||||
### `NVIDIA_DISABLE_REQUIRE`
|
||||
Single switch to disable all the constraints of the form `NVIDIA_REQUIRE_*`.
|
||||
|
||||
### `NVIDIA_REQUIRE_CUDA`
|
||||
|
||||
The version of the CUDA toolkit used by the container. It is an instance of the generic `NVIDIA_REQUIRE_*` case and it is set by official CUDA images.
|
||||
If the version of the NVIDIA driver is insufficient to run this version of CUDA, the container will not be started.
|
||||
|
||||
#### Possible values
|
||||
* `cuda>=7.5`, `cuda>=8.0`, `cuda>=9.0` …: any valid CUDA version in the form `major.minor`.
|
||||
|
||||
### `CUDA_VERSION`
|
||||
Similar to `NVIDIA_REQUIRE_CUDA`, for legacy CUDA images.
|
||||
In addition, if `NVIDIA_REQUIRE_CUDA` is not set, `NVIDIA_VISIBLE_DEVICES` and `NVIDIA_DRIVER_CAPABILITIES` will default to `all`.
|
||||
|
||||
## Usage example
|
||||
|
||||
**NOTE:** The use of the `nvidia-container-runtime` as CLI replacement for `runc` is uncommon and is only provided for completeness.
|
||||
|
||||
Although the `nvidia-container-runtime` is typically configured as a replacement for `runc` or `crun` in various container engines, it can also be
|
||||
invoked from the command line as `runc` would. For example:
|
||||
|
||||
```sh
|
||||
# Setup a rootfs based on Ubuntu 16.04
|
||||
cd $(mktemp -d) && mkdir rootfs
|
||||
curl -sS http://cdimage.ubuntu.com/ubuntu-base/releases/16.04/release/ubuntu-base-16.04.6-base-amd64.tar.gz | tar --exclude 'dev/*' -C rootfs -xz
|
||||
|
||||
# Create an OCI runtime spec
|
||||
nvidia-container-runtime spec
|
||||
sed -i 's;"sh";"nvidia-smi";' config.json
|
||||
sed -i 's;\("TERM=xterm"\);\1, "NVIDIA_VISIBLE_DEVICES=0";' config.json
|
||||
|
||||
# Run the container
|
||||
sudo nvidia-container-runtime run nvidia_smi
|
||||
```
|
||||
|
||||
@@ -3,7 +3,7 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
@@ -11,20 +11,19 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaRuntime = "nvidia-container-runtime"
|
||||
nvidiaHook = "nvidia-container-runtime-hook"
|
||||
bundlePathSuffix = "tests/output/bundle/"
|
||||
bundlePathSuffix = "test/output/bundle/"
|
||||
specFile = "config.json"
|
||||
unmodifiedSpecFileSuffix = "tests/input/test_spec.json"
|
||||
unmodifiedSpecFileSuffix = "test/input/test_spec.json"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -46,8 +45,8 @@ func TestMain(m *testing.M) {
|
||||
if err != nil {
|
||||
log.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
}
|
||||
testBinPath := filepath.Join(moduleRoot, "tests", "bin")
|
||||
testInputPath := filepath.Join(moduleRoot, "tests", "input")
|
||||
testBinPath := filepath.Join(moduleRoot, "test", "bin")
|
||||
testInputPath := filepath.Join(moduleRoot, "test", "input")
|
||||
|
||||
// Set the environment variables for the test
|
||||
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
|
||||
@@ -87,7 +86,6 @@ func TestBadInput(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
@@ -105,7 +103,6 @@ func TestGoodInput(t *testing.T) {
|
||||
t.Fatalf("error generating runtime spec: %v", err)
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
|
||||
output, err := cmdRun.CombinedOutput()
|
||||
@@ -116,7 +113,6 @@ func TestGoodInput(t *testing.T) {
|
||||
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
|
||||
require.Empty(t, spec.Hooks, "there should be no hooks in config.json")
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
@@ -162,7 +158,6 @@ func TestDuplicateHook(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test how runtime handles already existing prestart hook in config.json
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
output, err := cmdCreate.CombinedOutput()
|
||||
@@ -193,16 +188,15 @@ func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
|
||||
}
|
||||
defer jsonFile.Close()
|
||||
|
||||
jsonContent, err := io.ReadAll(jsonFile)
|
||||
switch {
|
||||
case err != nil:
|
||||
jsonContent, err := ioutil.ReadAll(jsonFile)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
case json.Valid(jsonContent):
|
||||
} else if json.Valid(jsonContent) {
|
||||
err = json.Unmarshal(jsonContent, &spec)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
}
|
||||
default:
|
||||
} else {
|
||||
err = json.NewDecoder(bytes.NewReader(jsonContent)).Decode(&spec)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
@@ -232,7 +226,6 @@ func (c testConfig) generateNewRuntimeSpec() error {
|
||||
return err
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmd := exec.Command("cp", c.unmodifiedSpecFile(), c.specFilePath())
|
||||
err = cmd.Run()
|
||||
if err != nil {
|
||||
|
||||
@@ -1,184 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
|
||||
)
|
||||
|
||||
const (
|
||||
Name = "containerd"
|
||||
|
||||
DefaultConfig = "/etc/containerd/config.toml"
|
||||
DefaultSocket = "/run/containerd/containerd.sock"
|
||||
DefaultRestartMode = "signal"
|
||||
|
||||
defaultRuntmeType = "io.containerd.runc.v2"
|
||||
)
|
||||
|
||||
// Options stores the containerd-specific options
|
||||
type Options struct {
|
||||
useLegacyConfig bool
|
||||
runtimeType string
|
||||
|
||||
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
|
||||
|
||||
runtimeConfigOverrideJSON string
|
||||
}
|
||||
|
||||
func Flags(opts *Options) []cli.Flag {
|
||||
flags := []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "use-legacy-config",
|
||||
Usage: "Specify whether a legacy (pre v1.3) config should be used. " +
|
||||
"This ensures that a version 1 container config is created by default and that the " +
|
||||
"containerd.runtimes.default_runtime config section is used to define the default " +
|
||||
"runtime instead of container.default_runtime_name.",
|
||||
Destination: &opts.useLegacyConfig,
|
||||
EnvVars: []string{"CONTAINERD_USE_LEGACY_CONFIG"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime-type",
|
||||
Usage: "The runtime_type to use for the configured runtime classes",
|
||||
Value: defaultRuntmeType,
|
||||
Destination: &opts.runtimeType,
|
||||
EnvVars: []string{"CONTAINERD_RUNTIME_TYPE"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "nvidia-container-runtime-modes.cdi.annotation-prefixes",
|
||||
Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime-config-override",
|
||||
Destination: &opts.runtimeConfigOverrideJSON,
|
||||
Usage: "specify additional runtime options as a JSON string. The paths are relative to the runtime config.",
|
||||
Value: "{}",
|
||||
EnvVars: []string{"RUNTIME_CONFIG_OVERRIDE", "CONTAINERD_RUNTIME_CONFIG_OVERRIDE"},
|
||||
},
|
||||
}
|
||||
|
||||
return flags
|
||||
}
|
||||
|
||||
// Setup updates a containerd configuration to include the nvidia-containerd-runtime and reloads it
|
||||
func Setup(c *cli.Context, o *container.Options, co *Options) error {
|
||||
log.Infof("Starting 'setup' for %v", c.App.Name)
|
||||
|
||||
cfg, err := getRuntimeConfig(o, co)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = o.Configure(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to configure containerd: %v", err)
|
||||
}
|
||||
|
||||
err = RestartContainerd(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to restart containerd: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Completed 'setup' for %v", c.App.Name)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cleanup reverts a containerd configuration to remove the nvidia-containerd-runtime and reloads it
|
||||
func Cleanup(c *cli.Context, o *container.Options, co *Options) error {
|
||||
log.Infof("Starting 'cleanup' for %v", c.App.Name)
|
||||
|
||||
cfg, err := getRuntimeConfig(o, co)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = o.Unconfigure(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to unconfigure containerd: %v", err)
|
||||
}
|
||||
|
||||
err = RestartContainerd(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to restart containerd: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Completed 'cleanup' for %v", c.App.Name)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RestartContainerd restarts containerd depending on the value of restartModeFlag
|
||||
func RestartContainerd(o *container.Options) error {
|
||||
return o.Restart("containerd", SignalContainerd)
|
||||
}
|
||||
|
||||
// containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
|
||||
func (o *Options) containerAnnotationsFromCDIPrefixes() []string {
|
||||
var annotations []string
|
||||
for _, prefix := range o.ContainerRuntimeModesCDIAnnotationPrefixes.Value() {
|
||||
annotations = append(annotations, prefix+"*")
|
||||
}
|
||||
|
||||
return annotations
|
||||
}
|
||||
|
||||
func (o *Options) runtimeConfigOverride() (map[string]interface{}, error) {
|
||||
if o.runtimeConfigOverrideJSON == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
runtimeOptions := make(map[string]interface{})
|
||||
if err := json.Unmarshal([]byte(o.runtimeConfigOverrideJSON), &runtimeOptions); err != nil {
|
||||
return nil, fmt.Errorf("failed to read %v as JSON: %w", o.runtimeConfigOverrideJSON, err)
|
||||
}
|
||||
|
||||
return runtimeOptions, nil
|
||||
}
|
||||
|
||||
func GetLowlevelRuntimePaths(o *container.Options, co *Options) ([]string, error) {
|
||||
cfg, err := getRuntimeConfig(o, co)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to load containerd config: %w", err)
|
||||
}
|
||||
return engine.GetBinaryPathsForRuntimes(cfg), nil
|
||||
}
|
||||
|
||||
func getRuntimeConfig(o *container.Options, co *Options) (engine.Interface, error) {
|
||||
return containerd.New(
|
||||
containerd.WithPath(o.Config),
|
||||
containerd.WithConfigSource(
|
||||
toml.LoadFirst(
|
||||
containerd.CommandLineSource(o.HostRootMount, o.ExecutablePath),
|
||||
toml.FromFile(o.Config),
|
||||
),
|
||||
),
|
||||
containerd.WithRuntimeType(co.runtimeType),
|
||||
containerd.WithUseLegacyConfig(co.useLegacyConfig),
|
||||
containerd.WithContainerAnnotations(co.containerAnnotationsFromCDIPrefixes()...),
|
||||
)
|
||||
}
|
||||
@@ -1,113 +0,0 @@
|
||||
/**
|
||||
# Copyright 2020-2023 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
reloadBackoff = 5 * time.Second
|
||||
maxReloadAttempts = 6
|
||||
|
||||
socketMessageToGetPID = ""
|
||||
)
|
||||
|
||||
// SignalContainerd sends a SIGHUP signal to the containerd daemon
|
||||
func SignalContainerd(socket string) error {
|
||||
log.Infof("Sending SIGHUP signal to containerd")
|
||||
|
||||
// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
|
||||
retriable := func() error {
|
||||
conn, err := net.Dial("unix", socket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to dial: %v", err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
sconn, err := conn.(*net.UnixConn).SyscallConn()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to get syscall connection: %v", err)
|
||||
}
|
||||
|
||||
err1 := sconn.Control(func(fd uintptr) {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1)
|
||||
})
|
||||
if err1 != nil {
|
||||
return fmt.Errorf("unable to issue call on socket fd: %v", err1)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err)
|
||||
}
|
||||
|
||||
_, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err)
|
||||
}
|
||||
|
||||
oob := make([]byte, 1024)
|
||||
_, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err)
|
||||
}
|
||||
|
||||
oob = oob[:oobn]
|
||||
scm, err := syscall.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err)
|
||||
}
|
||||
|
||||
ucred, err := syscall.ParseUnixCredentials(&scm[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err)
|
||||
}
|
||||
|
||||
err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to send SIGHUP to 'containerd' process: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try to send a SIGHUP up to maxReloadAttempts times
|
||||
var err error
|
||||
for i := 0; i < maxReloadAttempts; i++ {
|
||||
err = retriable()
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if i == maxReloadAttempts-1 {
|
||||
break
|
||||
}
|
||||
log.Warningf("Error signaling containerd, attempt %v/%v: %v", i+1, maxReloadAttempts, err)
|
||||
time.Sleep(reloadBackoff)
|
||||
}
|
||||
if err != nil {
|
||||
log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Infof("Successfully signaled containerd")
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
/**
|
||||
# Copyright 2023 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
// SignalContainerd is unsupported on non-linux platforms.
|
||||
func SignalContainerd(socket string) error {
|
||||
return errors.New("SignalContainerd is unsupported on non-linux platforms")
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestRuntimeOptions(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
options Options
|
||||
expected map[string]interface{}
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "empty is nil",
|
||||
},
|
||||
{
|
||||
description: "empty json",
|
||||
options: Options{
|
||||
runtimeConfigOverrideJSON: "{}",
|
||||
},
|
||||
expected: map[string]interface{}{},
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
description: "SystemdCgroup is true",
|
||||
options: Options{
|
||||
runtimeConfigOverrideJSON: "{\"SystemdCgroup\": true}",
|
||||
},
|
||||
expected: map[string]interface{}{
|
||||
"SystemdCgroup": true,
|
||||
},
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
description: "SystemdCgroup is false",
|
||||
options: Options{
|
||||
runtimeConfigOverrideJSON: "{\"SystemdCgroup\": false}",
|
||||
},
|
||||
expected: map[string]interface{}{
|
||||
"SystemdCgroup": false,
|
||||
},
|
||||
expectedError: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
runtimeOptions, err := tc.options.runtimeConfigOverride()
|
||||
require.ErrorIs(t, tc.expectedError, err)
|
||||
require.EqualValues(t, tc.expected, runtimeOptions)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,210 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package crio
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
|
||||
)
|
||||
|
||||
const (
|
||||
Name = "crio"
|
||||
|
||||
defaultConfigMode = "hook"
|
||||
|
||||
// Hook-based settings
|
||||
defaultHooksDir = "/usr/share/containers/oci/hooks.d"
|
||||
defaultHookFilename = "oci-nvidia-hook.json"
|
||||
|
||||
// Config-based settings
|
||||
DefaultConfig = "/etc/crio/crio.conf"
|
||||
DefaultSocket = "/var/run/crio/crio.sock"
|
||||
DefaultRestartMode = "systemd"
|
||||
)
|
||||
|
||||
// Options defines the cri-o specific options.
|
||||
type Options struct {
|
||||
configMode string
|
||||
|
||||
// hook-specific options
|
||||
hooksDir string
|
||||
hookFilename string
|
||||
}
|
||||
|
||||
func Flags(opts *Options) []cli.Flag {
|
||||
flags := []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "hooks-dir",
|
||||
Usage: "path to the cri-o hooks directory",
|
||||
Value: defaultHooksDir,
|
||||
Destination: &opts.hooksDir,
|
||||
EnvVars: []string{"CRIO_HOOKS_DIR"},
|
||||
DefaultText: defaultHooksDir,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "hook-filename",
|
||||
Usage: "filename of the cri-o hook that will be created / removed in the hooks directory",
|
||||
Value: defaultHookFilename,
|
||||
Destination: &opts.hookFilename,
|
||||
EnvVars: []string{"CRIO_HOOK_FILENAME"},
|
||||
DefaultText: defaultHookFilename,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config-mode",
|
||||
Usage: "the configuration mode to use. One of [hook | config]",
|
||||
Value: defaultConfigMode,
|
||||
Destination: &opts.configMode,
|
||||
EnvVars: []string{"CRIO_CONFIG_MODE"},
|
||||
},
|
||||
}
|
||||
|
||||
return flags
|
||||
}
|
||||
|
||||
// Setup installs the prestart hook required to launch GPU-enabled containers
|
||||
func Setup(c *cli.Context, o *container.Options, co *Options) error {
|
||||
log.Infof("Starting 'setup' for %v", c.App.Name)
|
||||
|
||||
switch co.configMode {
|
||||
case "hook":
|
||||
return setupHook(o, co)
|
||||
case "config":
|
||||
return setupConfig(o)
|
||||
default:
|
||||
return fmt.Errorf("invalid config-mode '%v'", co.configMode)
|
||||
}
|
||||
}
|
||||
|
||||
// setupHook installs the prestart hook required to launch GPU-enabled containers
|
||||
func setupHook(o *container.Options, co *Options) error {
|
||||
log.Infof("Installing prestart hook")
|
||||
|
||||
hookPath := filepath.Join(co.hooksDir, co.hookFilename)
|
||||
err := ocihook.CreateHook(hookPath, filepath.Join(o.RuntimeDir, config.NVIDIAContainerRuntimeHookExecutable))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating hook: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupConfig updates the cri-o config for the NVIDIA container runtime
|
||||
func setupConfig(o *container.Options) error {
|
||||
log.Infof("Updating config file")
|
||||
|
||||
cfg, err := getRuntimeConfig(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = o.Configure(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to configure cri-o: %v", err)
|
||||
}
|
||||
|
||||
err = RestartCrio(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to restart crio: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cleanup removes the specified prestart hook
|
||||
func Cleanup(c *cli.Context, o *container.Options, co *Options) error {
|
||||
log.Infof("Starting 'cleanup' for %v", c.App.Name)
|
||||
|
||||
switch co.configMode {
|
||||
case "hook":
|
||||
return cleanupHook(co)
|
||||
case "config":
|
||||
return cleanupConfig(o)
|
||||
default:
|
||||
return fmt.Errorf("invalid config-mode '%v'", co.configMode)
|
||||
}
|
||||
}
|
||||
|
||||
// cleanupHook removes the prestart hook
|
||||
func cleanupHook(co *Options) error {
|
||||
log.Infof("Removing prestart hook")
|
||||
|
||||
hookPath := filepath.Join(co.hooksDir, co.hookFilename)
|
||||
err := os.Remove(hookPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing hook '%v': %v", hookPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupConfig removes the NVIDIA container runtime from the cri-o config
|
||||
func cleanupConfig(o *container.Options) error {
|
||||
log.Infof("Reverting config file modifications")
|
||||
|
||||
cfg, err := getRuntimeConfig(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = o.Unconfigure(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to unconfigure cri-o: %v", err)
|
||||
}
|
||||
|
||||
err = RestartCrio(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to restart crio: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RestartCrio restarts crio depending on the value of restartModeFlag
|
||||
func RestartCrio(o *container.Options) error {
|
||||
return o.Restart("crio", func(string) error { return fmt.Errorf("supporting crio via signal is unsupported") })
|
||||
}
|
||||
|
||||
func GetLowlevelRuntimePaths(o *container.Options) ([]string, error) {
|
||||
cfg, err := getRuntimeConfig(o)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to load crio config: %w", err)
|
||||
}
|
||||
return engine.GetBinaryPathsForRuntimes(cfg), nil
|
||||
}
|
||||
|
||||
func getRuntimeConfig(o *container.Options) (engine.Interface, error) {
|
||||
return crio.New(
|
||||
crio.WithPath(o.Config),
|
||||
crio.WithConfigSource(
|
||||
toml.LoadFirst(
|
||||
crio.CommandLineSource(o.HostRootMount, o.ExecutablePath),
|
||||
toml.FromFile(o.Config),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
@@ -1,111 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
|
||||
)
|
||||
|
||||
const (
|
||||
Name = "docker"
|
||||
|
||||
DefaultConfig = "/etc/docker/daemon.json"
|
||||
DefaultSocket = "/var/run/docker.sock"
|
||||
DefaultRestartMode = "signal"
|
||||
)
|
||||
|
||||
type Options struct{}
|
||||
|
||||
func Flags(opts *Options) []cli.Flag {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Setup updates docker configuration to include the nvidia runtime and reloads it
|
||||
func Setup(c *cli.Context, o *container.Options) error {
|
||||
log.Infof("Starting 'setup' for %v", c.App.Name)
|
||||
|
||||
cfg, err := getRuntimeConfig(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = o.Configure(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to configure docker: %v", err)
|
||||
}
|
||||
|
||||
err = RestartDocker(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to restart docker: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Completed 'setup' for %v", c.App.Name)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cleanup reverts docker configuration to remove the nvidia runtime and reloads it
|
||||
func Cleanup(c *cli.Context, o *container.Options) error {
|
||||
log.Infof("Starting 'cleanup' for %v", c.App.Name)
|
||||
|
||||
cfg, err := getRuntimeConfig(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = o.Unconfigure(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to unconfigure docker: %v", err)
|
||||
}
|
||||
|
||||
err = RestartDocker(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to signal docker: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Completed 'cleanup' for %v", c.App.Name)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RestartDocker restarts docker depending on the value of restartModeFlag
|
||||
func RestartDocker(o *container.Options) error {
|
||||
return o.Restart("docker", SignalDocker)
|
||||
}
|
||||
|
||||
func GetLowlevelRuntimePaths(o *container.Options) ([]string, error) {
|
||||
cfg, err := docker.New(
|
||||
docker.WithPath(o.Config),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to load docker config: %w", err)
|
||||
}
|
||||
return engine.GetBinaryPathsForRuntimes(cfg), nil
|
||||
}
|
||||
|
||||
func getRuntimeConfig(o *container.Options) (engine.Interface, error) {
|
||||
return docker.New(
|
||||
docker.WithPath(o.Config),
|
||||
)
|
||||
}
|
||||
@@ -1,113 +0,0 @@
|
||||
/**
|
||||
# Copyright 2021-2023 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
reloadBackoff = 5 * time.Second
|
||||
maxReloadAttempts = 6
|
||||
|
||||
socketMessageToGetPID = "GET /info HTTP/1.0\r\n\r\n"
|
||||
)
|
||||
|
||||
// SignalDocker sends a SIGHUP signal to docker daemon
|
||||
func SignalDocker(socket string) error {
|
||||
log.Infof("Sending SIGHUP signal to docker")
|
||||
|
||||
// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
|
||||
retriable := func() error {
|
||||
conn, err := net.Dial("unix", socket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to dial: %v", err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
sconn, err := conn.(*net.UnixConn).SyscallConn()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to get syscall connection: %v", err)
|
||||
}
|
||||
|
||||
err1 := sconn.Control(func(fd uintptr) {
|
||||
err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1)
|
||||
})
|
||||
if err1 != nil {
|
||||
return fmt.Errorf("unable to issue call on socket fd: %v", err1)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err)
|
||||
}
|
||||
|
||||
_, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err)
|
||||
}
|
||||
|
||||
oob := make([]byte, 1024)
|
||||
_, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err)
|
||||
}
|
||||
|
||||
oob = oob[:oobn]
|
||||
scm, err := syscall.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err)
|
||||
}
|
||||
|
||||
ucred, err := syscall.ParseUnixCredentials(&scm[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err)
|
||||
}
|
||||
|
||||
err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to send SIGHUP to 'docker' process: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try to send a SIGHUP up to maxReloadAttempts times
|
||||
var err error
|
||||
for i := 0; i < maxReloadAttempts; i++ {
|
||||
err = retriable()
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if i == maxReloadAttempts-1 {
|
||||
break
|
||||
}
|
||||
log.Warningf("Error signaling docker, attempt %v/%v: %v", i+1, maxReloadAttempts, err)
|
||||
time.Sleep(reloadBackoff)
|
||||
}
|
||||
if err != nil {
|
||||
log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Infof("Successfully signaled docker")
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,204 +0,0 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/containerd"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/crio"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/docker"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSetAsDefault = true
|
||||
// defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled
|
||||
defaultRuntimeName = "nvidia"
|
||||
defaultHostRootMount = "/host"
|
||||
|
||||
runtimeSpecificDefault = "RUNTIME_SPECIFIC_DEFAULT"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
container.Options
|
||||
|
||||
containerdOptions containerd.Options
|
||||
crioOptions crio.Options
|
||||
}
|
||||
|
||||
func Flags(opts *Options) []cli.Flag {
|
||||
flags := []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "config",
|
||||
Usage: "Path to the runtime config file",
|
||||
Value: runtimeSpecificDefault,
|
||||
Destination: &opts.Config,
|
||||
EnvVars: []string{"RUNTIME_CONFIG", "CONTAINERD_CONFIG", "DOCKER_CONFIG"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "executable-path",
|
||||
Usage: "The path to the runtime executable. This is used to extract the current config",
|
||||
Destination: &opts.ExecutablePath,
|
||||
EnvVars: []string{"RUNTIME_EXECUTABLE_PATH"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "socket",
|
||||
Usage: "Path to the runtime socket file",
|
||||
Value: runtimeSpecificDefault,
|
||||
Destination: &opts.Socket,
|
||||
EnvVars: []string{"RUNTIME_SOCKET", "CONTAINERD_SOCKET", "DOCKER_SOCKET"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "restart-mode",
|
||||
Usage: "Specify how the runtime should be restarted; If 'none' is selected it will not be restarted [signal | systemd | none ]",
|
||||
Value: runtimeSpecificDefault,
|
||||
Destination: &opts.RestartMode,
|
||||
EnvVars: []string{"RUNTIME_RESTART_MODE"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "enable-cdi-in-runtime",
|
||||
Usage: "Enable CDI in the configured runt ime",
|
||||
Destination: &opts.EnableCDI,
|
||||
EnvVars: []string{"RUNTIME_ENABLE_CDI"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "host-root",
|
||||
Usage: "Specify the path to the host root to be used when restarting the runtime using systemd",
|
||||
Value: defaultHostRootMount,
|
||||
Destination: &opts.HostRootMount,
|
||||
EnvVars: []string{"HOST_ROOT_MOUNT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime-name",
|
||||
Aliases: []string{"nvidia-runtime-name", "runtime-class"},
|
||||
Usage: "Specify the name of the `nvidia` runtime. If set-as-default is selected, the runtime is used as the default runtime.",
|
||||
Value: defaultRuntimeName,
|
||||
Destination: &opts.RuntimeName,
|
||||
EnvVars: []string{"NVIDIA_RUNTIME_NAME", "CONTAINERD_RUNTIME_CLASS", "DOCKER_RUNTIME_NAME"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "set-as-default",
|
||||
Usage: "Set the `nvidia` runtime as the default runtime.",
|
||||
Value: defaultSetAsDefault,
|
||||
Destination: &opts.SetAsDefault,
|
||||
EnvVars: []string{"NVIDIA_RUNTIME_SET_AS_DEFAULT", "CONTAINERD_SET_AS_DEFAULT", "DOCKER_SET_AS_DEFAULT"},
|
||||
Hidden: true,
|
||||
},
|
||||
}
|
||||
|
||||
flags = append(flags, containerd.Flags(&opts.containerdOptions)...)
|
||||
flags = append(flags, crio.Flags(&opts.crioOptions)...)
|
||||
|
||||
return flags
|
||||
}
|
||||
|
||||
// Validate checks whether the specified options are valid
|
||||
func (opts *Options) Validate(logger logger.Interface, c *cli.Context, runtime string, toolkitRoot string, to *toolkit.Options) error {
|
||||
// We set this option here to ensure that it is available in future calls.
|
||||
opts.RuntimeDir = toolkitRoot
|
||||
|
||||
if !c.IsSet("enable-cdi-in-runtime") {
|
||||
opts.EnableCDI = to.CDI.Enabled
|
||||
}
|
||||
|
||||
if opts.ExecutablePath != "" && opts.RuntimeName == docker.Name {
|
||||
logger.Warningf("Ignoring executable-path=%q flag for %v", opts.ExecutablePath, opts.RuntimeName)
|
||||
opts.ExecutablePath = ""
|
||||
}
|
||||
|
||||
// Apply the runtime-specific config changes.
|
||||
switch runtime {
|
||||
case containerd.Name:
|
||||
if opts.Config == runtimeSpecificDefault {
|
||||
opts.Config = containerd.DefaultConfig
|
||||
}
|
||||
if opts.Socket == runtimeSpecificDefault {
|
||||
opts.Socket = containerd.DefaultSocket
|
||||
}
|
||||
if opts.RestartMode == runtimeSpecificDefault {
|
||||
opts.RestartMode = containerd.DefaultRestartMode
|
||||
}
|
||||
case crio.Name:
|
||||
if opts.Config == runtimeSpecificDefault {
|
||||
opts.Config = crio.DefaultConfig
|
||||
}
|
||||
if opts.Socket == runtimeSpecificDefault {
|
||||
opts.Socket = crio.DefaultSocket
|
||||
}
|
||||
if opts.RestartMode == runtimeSpecificDefault {
|
||||
opts.RestartMode = crio.DefaultRestartMode
|
||||
}
|
||||
case docker.Name:
|
||||
if opts.Config == runtimeSpecificDefault {
|
||||
opts.Config = docker.DefaultConfig
|
||||
}
|
||||
if opts.Socket == runtimeSpecificDefault {
|
||||
opts.Socket = docker.DefaultSocket
|
||||
}
|
||||
if opts.RestartMode == runtimeSpecificDefault {
|
||||
opts.RestartMode = docker.DefaultRestartMode
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("undefined runtime %v", runtime)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Setup(c *cli.Context, opts *Options, runtime string) error {
|
||||
switch runtime {
|
||||
case containerd.Name:
|
||||
return containerd.Setup(c, &opts.Options, &opts.containerdOptions)
|
||||
case crio.Name:
|
||||
return crio.Setup(c, &opts.Options, &opts.crioOptions)
|
||||
case docker.Name:
|
||||
return docker.Setup(c, &opts.Options)
|
||||
default:
|
||||
return fmt.Errorf("undefined runtime %v", runtime)
|
||||
}
|
||||
}
|
||||
|
||||
func Cleanup(c *cli.Context, opts *Options, runtime string) error {
|
||||
switch runtime {
|
||||
case containerd.Name:
|
||||
return containerd.Cleanup(c, &opts.Options, &opts.containerdOptions)
|
||||
case crio.Name:
|
||||
return crio.Cleanup(c, &opts.Options, &opts.crioOptions)
|
||||
case docker.Name:
|
||||
return docker.Cleanup(c, &opts.Options)
|
||||
default:
|
||||
return fmt.Errorf("undefined runtime %v", runtime)
|
||||
}
|
||||
}
|
||||
|
||||
func GetLowlevelRuntimePaths(opts *Options, runtime string) ([]string, error) {
|
||||
switch runtime {
|
||||
case containerd.Name:
|
||||
return containerd.GetLowlevelRuntimePaths(&opts.Options, &opts.containerdOptions)
|
||||
case crio.Name:
|
||||
return crio.GetLowlevelRuntimePaths(&opts.Options)
|
||||
case docker.Name:
|
||||
return docker.GetLowlevelRuntimePaths(&opts.Options)
|
||||
default:
|
||||
return nil, fmt.Errorf("undefined runtime %v", runtime)
|
||||
}
|
||||
}
|
||||
@@ -1,279 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
toolkitPidFilename = "toolkit.pid"
|
||||
defaultPidFile = "/run/nvidia/toolkit/" + toolkitPidFilename
|
||||
|
||||
defaultToolkitInstallDir = "/usr/local/nvidia"
|
||||
toolkitSubDir = "toolkit"
|
||||
|
||||
defaultRuntime = "docker"
|
||||
)
|
||||
|
||||
var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "containerd": {}}
|
||||
var defaultLowLevelRuntimes = []string{"runc", "crun"}
|
||||
|
||||
var waitingForSignal = make(chan bool, 1)
|
||||
var signalReceived = make(chan bool, 1)
|
||||
|
||||
// options stores the command line arguments
|
||||
type options struct {
|
||||
toolkitInstallDir string
|
||||
|
||||
noDaemon bool
|
||||
runtime string
|
||||
pidFile string
|
||||
sourceRoot string
|
||||
|
||||
toolkitOptions toolkit.Options
|
||||
runtimeOptions runtime.Options
|
||||
}
|
||||
|
||||
func (o options) toolkitRoot() string {
|
||||
return filepath.Join(o.toolkitInstallDir, toolkitSubDir)
|
||||
}
|
||||
|
||||
func main() {
|
||||
logger := logger.New()
|
||||
c := NewApp(logger)
|
||||
|
||||
// Run the CLI
|
||||
logger.Infof("Starting %v", c.Name)
|
||||
if err := c.Run(os.Args); err != nil {
|
||||
logger.Errorf("error running %v: %v", c.Name, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
logger.Infof("Completed %v", c.Name)
|
||||
}
|
||||
|
||||
// An app represents the nvidia-ctk-installer.
|
||||
type app struct {
|
||||
logger logger.Interface
|
||||
|
||||
toolkit *toolkit.Installer
|
||||
}
|
||||
|
||||
// NewApp creates the CLI app fro the specified options.
|
||||
func NewApp(logger logger.Interface) *cli.App {
|
||||
a := app{
|
||||
logger: logger,
|
||||
}
|
||||
return a.build()
|
||||
}
|
||||
|
||||
func (a app) build() *cli.App {
|
||||
options := options{
|
||||
toolkitOptions: toolkit.Options{},
|
||||
}
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "nvidia-ctk-installer"
|
||||
c.Usage = "Install the NVIDIA Container Toolkit and configure the specified runtime to use the `nvidia` runtime."
|
||||
c.Version = info.GetVersionString()
|
||||
c.Before = func(ctx *cli.Context) error {
|
||||
return a.Before(ctx, &options)
|
||||
}
|
||||
c.Action = func(ctx *cli.Context) error {
|
||||
return a.Run(ctx, &options)
|
||||
}
|
||||
|
||||
// Setup flags for the CLI
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "no-daemon",
|
||||
Aliases: []string{"n"},
|
||||
Usage: "terminate immediately after setting up the runtime. Note that no cleanup will be performed",
|
||||
Destination: &options.noDaemon,
|
||||
EnvVars: []string{"NO_DAEMON"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime",
|
||||
Aliases: []string{"r"},
|
||||
Usage: "the runtime to setup on this node. One of {'docker', 'crio', 'containerd'}",
|
||||
Value: defaultRuntime,
|
||||
Destination: &options.runtime,
|
||||
EnvVars: []string{"RUNTIME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "toolkit-install-dir",
|
||||
Aliases: []string{"root"},
|
||||
Usage: "The directory where the NVIDIA Container Toolkit is to be installed. " +
|
||||
"The components of the toolkit will be installed to `ROOT`/toolkit. " +
|
||||
"Note that in the case of a containerized installer, this is the path in the container and it is " +
|
||||
"recommended that this match the path on the host.",
|
||||
Value: defaultToolkitInstallDir,
|
||||
Destination: &options.toolkitInstallDir,
|
||||
EnvVars: []string{"TOOLKIT_INSTALL_DIR", "ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "source-root",
|
||||
Value: "/",
|
||||
Usage: "The folder where the required toolkit artifacts can be found",
|
||||
Destination: &options.sourceRoot,
|
||||
EnvVars: []string{"SOURCE_ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "pid-file",
|
||||
Value: defaultPidFile,
|
||||
Usage: "the path to a toolkit.pid file to ensure that only a single configuration instance is running",
|
||||
Destination: &options.pidFile,
|
||||
EnvVars: []string{"TOOLKIT_PID_FILE", "PID_FILE"},
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = append(c.Flags, toolkit.Flags(&options.toolkitOptions)...)
|
||||
c.Flags = append(c.Flags, runtime.Flags(&options.runtimeOptions)...)
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func (a *app) Before(c *cli.Context, o *options) error {
|
||||
a.toolkit = toolkit.NewInstaller(
|
||||
toolkit.WithLogger(a.logger),
|
||||
toolkit.WithSourceRoot(o.sourceRoot),
|
||||
toolkit.WithToolkitRoot(o.toolkitRoot()),
|
||||
)
|
||||
return a.validateFlags(c, o)
|
||||
}
|
||||
|
||||
func (a *app) validateFlags(c *cli.Context, o *options) error {
|
||||
if o.toolkitInstallDir == "" {
|
||||
return fmt.Errorf("the install root must be specified")
|
||||
}
|
||||
if _, exists := availableRuntimes[o.runtime]; !exists {
|
||||
return fmt.Errorf("unknown runtime: %v", o.runtime)
|
||||
}
|
||||
if filepath.Base(o.pidFile) != toolkitPidFilename {
|
||||
return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile)
|
||||
}
|
||||
|
||||
if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := o.runtimeOptions.Validate(a.logger, c, o.runtime, o.toolkitRoot(), &o.toolkitOptions); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Run installs the NVIDIA Container Toolkit and updates the requested runtime.
|
||||
// If the application is run as a daemon, the application waits and unconfigures
|
||||
// the runtime on termination.
|
||||
func (a *app) Run(c *cli.Context, o *options) error {
|
||||
err := a.initialize(o.pidFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to initialize: %v", err)
|
||||
}
|
||||
defer a.shutdown(o.pidFile)
|
||||
|
||||
if len(o.toolkitOptions.ContainerRuntimeRuntimes.Value()) == 0 {
|
||||
lowlevelRuntimePaths, err := runtime.GetLowlevelRuntimePaths(&o.runtimeOptions, o.runtime)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to determine runtime options: %w", err)
|
||||
}
|
||||
lowlevelRuntimePaths = append(lowlevelRuntimePaths, defaultLowLevelRuntimes...)
|
||||
|
||||
o.toolkitOptions.ContainerRuntimeRuntimes = *cli.NewStringSlice(lowlevelRuntimePaths...)
|
||||
}
|
||||
|
||||
err = a.toolkit.Install(c, &o.toolkitOptions)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to install toolkit: %v", err)
|
||||
}
|
||||
|
||||
err = runtime.Setup(c, &o.runtimeOptions, o.runtime)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to setup runtime: %v", err)
|
||||
}
|
||||
|
||||
if !o.noDaemon {
|
||||
err = a.waitForSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to wait for signal: %v", err)
|
||||
}
|
||||
|
||||
err = runtime.Cleanup(c, &o.runtimeOptions, o.runtime)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to cleanup runtime: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *app) initialize(pidFile string) error {
|
||||
a.logger.Infof("Initializing")
|
||||
|
||||
if dir := filepath.Dir(pidFile); dir != "" {
|
||||
err := os.MkdirAll(dir, 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create folder for pidfile: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
f, err := os.Create(pidFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create pidfile: %v", err)
|
||||
}
|
||||
|
||||
err = unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB)
|
||||
if err != nil {
|
||||
a.logger.Warningf("Unable to get exclusive lock on '%v'", pidFile)
|
||||
a.logger.Warningf("This normally means an instance of the NVIDIA toolkit Container is already running, aborting")
|
||||
return fmt.Errorf("unable to get flock on pidfile: %v", err)
|
||||
}
|
||||
|
||||
_, err = fmt.Fprintf(f, "%v\n", os.Getpid())
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write PID to pidfile: %v", err)
|
||||
}
|
||||
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGHUP, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGPIPE, syscall.SIGTERM)
|
||||
go func() {
|
||||
<-sigs
|
||||
select {
|
||||
case <-waitingForSignal:
|
||||
signalReceived <- true
|
||||
default:
|
||||
a.logger.Infof("Signal received, exiting early")
|
||||
a.shutdown(pidFile)
|
||||
os.Exit(0)
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *app) waitForSignal() error {
|
||||
a.logger.Infof("Waiting for signal")
|
||||
waitingForSignal <- true
|
||||
<-signalReceived
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *app) shutdown(pidFile string) {
|
||||
a.logger.Infof("Shutting Down")
|
||||
|
||||
err := os.Remove(pidFile)
|
||||
if err != nil {
|
||||
a.logger.Warningf("Unable to remove pidfile: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1,455 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
)
|
||||
|
||||
func TestApp(t *testing.T) {
|
||||
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
require.NoError(t, err)
|
||||
|
||||
artifactRoot := filepath.Join(moduleRoot, "testdata", "installer", "artifacts")
|
||||
hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1")
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
args []string
|
||||
expectedToolkitConfig string
|
||||
expectedRuntimeConfig string
|
||||
}{
|
||||
{
|
||||
description: "no args",
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime.modes.legacy]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `{
|
||||
"default-runtime": "nvidia",
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
},
|
||||
"nvidia-cdi": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
},
|
||||
"nvidia-legacy": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
description: "CDI enabled enables CDI in docker",
|
||||
args: []string{"--cdi-enabled", "--create-device-nodes=none"},
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime.modes.legacy]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `{
|
||||
"default-runtime": "nvidia",
|
||||
"features": {
|
||||
"cdi": true
|
||||
},
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
},
|
||||
"nvidia-cdi": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
},
|
||||
"nvidia-legacy": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in Docker",
|
||||
args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false"},
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime.modes.legacy]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `{
|
||||
"default-runtime": "nvidia",
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
},
|
||||
"nvidia-cdi": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
},
|
||||
"nvidia-legacy": {
|
||||
"args": [],
|
||||
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
}
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
description: "CDI enabled enables CDI in containerd",
|
||||
args: []string{"--cdi-enabled", "--runtime=containerd"},
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime.modes.legacy]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `version = 2
|
||||
|
||||
[plugins]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
enable_cdi = true
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd]
|
||||
default_runtime_name = "nvidia"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in containerd",
|
||||
args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false", "--runtime=containerd"},
|
||||
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
|
||||
load-kmods = true
|
||||
no-cgroups = false
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
|
||||
root = "/run/nvidia/driver"
|
||||
user = ""
|
||||
|
||||
[nvidia-container-runtime]
|
||||
debug = "/dev/null"
|
||||
log-level = "info"
|
||||
mode = "auto"
|
||||
runtimes = ["runc", "crun"]
|
||||
|
||||
[nvidia-container-runtime.modes]
|
||||
|
||||
[nvidia-container-runtime.modes.cdi]
|
||||
annotation-prefixes = ["cdi.k8s.io/"]
|
||||
default-kind = "nvidia.com/gpu"
|
||||
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
[nvidia-container-runtime.modes.legacy]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
|
||||
[nvidia-container-runtime-hook]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
|
||||
skip-mode-detection = true
|
||||
|
||||
[nvidia-ctk]
|
||||
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
|
||||
`,
|
||||
expectedRuntimeConfig: `version = 2
|
||||
|
||||
[plugins]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd]
|
||||
default_runtime_name = "nvidia"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy]
|
||||
privileged_without_host_devices = false
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options]
|
||||
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
testRoot := t.TempDir()
|
||||
|
||||
cdiOutputDir := filepath.Join(testRoot, "/var/run/cdi")
|
||||
runtimeConfigFile := filepath.Join(testRoot, "config.file")
|
||||
|
||||
toolkitRoot := filepath.Join(testRoot, "toolkit-test")
|
||||
toolkitConfigFile := filepath.Join(toolkitRoot, "toolkit/.config/nvidia-container-runtime/config.toml")
|
||||
|
||||
app := NewApp(logger)
|
||||
|
||||
testArgs := []string{
|
||||
"nvidia-ctk-installer",
|
||||
"--toolkit-install-dir=" + toolkitRoot,
|
||||
"--no-daemon",
|
||||
"--cdi-output-dir=" + cdiOutputDir,
|
||||
"--config=" + runtimeConfigFile,
|
||||
"--create-device-nodes=none",
|
||||
"--driver-root-ctr-path=" + hostRoot,
|
||||
"--pid-file=" + filepath.Join(testRoot, "toolkit.pid"),
|
||||
"--restart-mode=none",
|
||||
"--source-root=" + filepath.Join(artifactRoot, "deb"),
|
||||
}
|
||||
|
||||
err := app.Run(append(testArgs, tc.args...))
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
require.FileExists(t, toolkitConfigFile)
|
||||
toolkitConfigFileContents, err := os.ReadFile(toolkitConfigFile)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, strings.ReplaceAll(tc.expectedToolkitConfig, "{{ .toolkitRoot }}", toolkitRoot), string(toolkitConfigFileContents))
|
||||
|
||||
require.FileExists(t, runtimeConfigFile)
|
||||
runtimeConfigFileContents, err := os.ReadFile(runtimeConfigFile)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, strings.ReplaceAll(tc.expectedRuntimeConfig, "{{ .toolkitRoot }}", toolkitRoot), string(runtimeConfigFileContents))
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,85 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
// An artifactRoot is used as a source for installed artifacts.
|
||||
// It is refined by a directory path, a library locator, and an executable locator.
|
||||
type artifactRoot struct {
|
||||
path string
|
||||
libraries lookup.Locator
|
||||
executables lookup.Locator
|
||||
}
|
||||
|
||||
func newArtifactRoot(logger logger.Interface, rootDirectoryPath string) (*artifactRoot, error) {
|
||||
relativeLibrarySearchPaths := []string{
|
||||
"/usr/lib64",
|
||||
"/usr/lib/x86_64-linux-gnu",
|
||||
"/usr/lib/aarch64-linux-gnu",
|
||||
}
|
||||
var librarySearchPaths []string
|
||||
for _, l := range relativeLibrarySearchPaths {
|
||||
librarySearchPaths = append(librarySearchPaths, filepath.Join(rootDirectoryPath, l))
|
||||
}
|
||||
|
||||
a := artifactRoot{
|
||||
path: rootDirectoryPath,
|
||||
libraries: lookup.NewLibraryLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithCount(1),
|
||||
lookup.WithSearchPaths(librarySearchPaths...),
|
||||
),
|
||||
executables: lookup.NewExecutableLocator(
|
||||
logger,
|
||||
rootDirectoryPath,
|
||||
),
|
||||
}
|
||||
|
||||
return &a, nil
|
||||
}
|
||||
|
||||
func (r *artifactRoot) findLibrary(name string) (string, error) {
|
||||
candidates, err := r.libraries.Locate(name)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error locating library: %w", err)
|
||||
}
|
||||
if len(candidates) == 0 {
|
||||
return "", fmt.Errorf("library %v not found", name)
|
||||
}
|
||||
|
||||
return candidates[0], nil
|
||||
}
|
||||
|
||||
func (r *artifactRoot) findExecutable(name string) (string, error) {
|
||||
candidates, err := r.executables.Locate(name)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error locating executable: %w", err)
|
||||
}
|
||||
if len(candidates) == 0 {
|
||||
return "", fmt.Errorf("executable %v not found", name)
|
||||
}
|
||||
|
||||
return candidates[0], nil
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
type createDirectory struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
func (t *toolkitInstaller) createDirectory() Installer {
|
||||
return &createDirectory{
|
||||
logger: t.logger,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *createDirectory) Install(dir string) error {
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
d.logger.Infof("Creating directory '%v'", dir)
|
||||
err := os.MkdirAll(dir, 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating directory: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1,184 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/operator"
|
||||
)
|
||||
|
||||
type executable struct {
|
||||
requiresKernelModule bool
|
||||
path string
|
||||
symlink string
|
||||
args []string
|
||||
env map[string]string
|
||||
}
|
||||
|
||||
func (t *toolkitInstaller) collectExecutables(destDir string) ([]Installer, error) {
|
||||
configHome := filepath.Join(destDir, ".config")
|
||||
configDir := filepath.Join(configHome, "nvidia-container-runtime")
|
||||
configPath := filepath.Join(configDir, "config.toml")
|
||||
|
||||
executables := []executable{
|
||||
{
|
||||
path: "nvidia-ctk",
|
||||
},
|
||||
{
|
||||
path: "nvidia-cdi-hook",
|
||||
},
|
||||
}
|
||||
for _, runtime := range operator.GetRuntimes() {
|
||||
e := executable{
|
||||
path: runtime.Path,
|
||||
requiresKernelModule: true,
|
||||
env: map[string]string{
|
||||
"XDG_CONFIG_HOME": configHome,
|
||||
},
|
||||
}
|
||||
executables = append(executables, e)
|
||||
}
|
||||
executables = append(executables,
|
||||
executable{
|
||||
path: "nvidia-container-cli",
|
||||
env: map[string]string{"LD_LIBRARY_PATH": destDir + ":$LD_LIBRARY_PATH"},
|
||||
},
|
||||
)
|
||||
|
||||
executables = append(executables,
|
||||
executable{
|
||||
path: "nvidia-container-runtime-hook",
|
||||
symlink: "nvidia-container-toolkit",
|
||||
args: []string{fmt.Sprintf("-config %s", configPath)},
|
||||
},
|
||||
)
|
||||
|
||||
var installers []Installer
|
||||
for _, executable := range executables {
|
||||
executablePath, err := t.artifactRoot.findExecutable(executable.path)
|
||||
if err != nil {
|
||||
if t.ignoreErrors {
|
||||
log.Errorf("Ignoring error: %v", err)
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
wrappedExecutableFilename := filepath.Base(executablePath)
|
||||
dotRealFilename := wrappedExecutableFilename + ".real"
|
||||
|
||||
w := &wrapper{
|
||||
Source: executablePath,
|
||||
WrappedExecutable: dotRealFilename,
|
||||
CheckModules: executable.requiresKernelModule,
|
||||
Args: executable.args,
|
||||
Envvars: map[string]string{
|
||||
"PATH": strings.Join([]string{destDir, "$PATH"}, ":"),
|
||||
},
|
||||
}
|
||||
for k, v := range executable.env {
|
||||
w.Envvars[k] = v
|
||||
}
|
||||
|
||||
installers = append(installers, w)
|
||||
|
||||
if executable.symlink == "" {
|
||||
continue
|
||||
}
|
||||
link := symlink{
|
||||
linkname: executable.symlink,
|
||||
target: filepath.Base(executablePath),
|
||||
}
|
||||
installers = append(installers, link)
|
||||
}
|
||||
|
||||
return installers, nil
|
||||
|
||||
}
|
||||
|
||||
type wrapper struct {
|
||||
Source string
|
||||
Envvars map[string]string
|
||||
WrappedExecutable string
|
||||
CheckModules bool
|
||||
Args []string
|
||||
}
|
||||
|
||||
type render struct {
|
||||
*wrapper
|
||||
DestDir string
|
||||
}
|
||||
|
||||
func (w *wrapper) Install(destDir string) error {
|
||||
// Copy the executable with a .real extension.
|
||||
mode, err := installFile(w.Source, filepath.Join(destDir, w.WrappedExecutable))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create a wrapper file.
|
||||
r := render{
|
||||
wrapper: w,
|
||||
DestDir: destDir,
|
||||
}
|
||||
content, err := r.render()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to render wrapper: %w", err)
|
||||
}
|
||||
wrapperFile := filepath.Join(destDir, filepath.Base(w.Source))
|
||||
return installContent(content, wrapperFile, mode|0111)
|
||||
}
|
||||
|
||||
func (w *render) render() (io.Reader, error) {
|
||||
wrapperTemplate := `#! /bin/sh
|
||||
{{- if (.CheckModules) }}
|
||||
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
|
||||
if [ "${?}" != "0" ]; then
|
||||
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
||||
exec runc "$@"
|
||||
fi
|
||||
{{- end }}
|
||||
{{- range $key, $value := .Envvars }}
|
||||
{{$key}}={{$value}} \
|
||||
{{- end }}
|
||||
{{ .DestDir }}/{{ .WrappedExecutable }} \
|
||||
{{- range $arg := .Args }}
|
||||
{{$arg}} \
|
||||
{{- end }}
|
||||
"$@"
|
||||
`
|
||||
|
||||
var content bytes.Buffer
|
||||
tmpl, err := template.New("wrapper").Parse(wrapperTemplate)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := tmpl.Execute(&content, w); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &content, nil
|
||||
}
|
||||
@@ -1,104 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestWrapperRender(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
w *wrapper
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
description: "executable is added",
|
||||
w: &wrapper{
|
||||
WrappedExecutable: "some-runtime",
|
||||
},
|
||||
expected: `#! /bin/sh
|
||||
/dest-dir/some-runtime \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "module check is added",
|
||||
w: &wrapper{
|
||||
WrappedExecutable: "some-runtime",
|
||||
CheckModules: true,
|
||||
},
|
||||
expected: `#! /bin/sh
|
||||
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
|
||||
if [ "${?}" != "0" ]; then
|
||||
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
||||
exec runc "$@"
|
||||
fi
|
||||
/dest-dir/some-runtime \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "environment is added",
|
||||
w: &wrapper{
|
||||
WrappedExecutable: "some-runtime",
|
||||
Envvars: map[string]string{
|
||||
"PATH": "/foo/bar/baz",
|
||||
},
|
||||
},
|
||||
expected: `#! /bin/sh
|
||||
PATH=/foo/bar/baz \
|
||||
/dest-dir/some-runtime \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "args are added",
|
||||
w: &wrapper{
|
||||
WrappedExecutable: "some-runtime",
|
||||
Args: []string{"--config foo", "bar"},
|
||||
},
|
||||
expected: `#! /bin/sh
|
||||
/dest-dir/some-runtime \
|
||||
--config foo \
|
||||
bar \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
r := render{
|
||||
wrapper: tc.w,
|
||||
DestDir: "/dest-dir",
|
||||
}
|
||||
reader, err := r.render()
|
||||
require.NoError(t, err)
|
||||
|
||||
var content bytes.Buffer
|
||||
_, err = content.ReadFrom(reader)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, tc.expected, content.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,188 +0,0 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that fileInstallerMock does implement fileInstaller.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ fileInstaller = &fileInstallerMock{}
|
||||
|
||||
// fileInstallerMock is a mock implementation of fileInstaller.
|
||||
//
|
||||
// func TestSomethingThatUsesfileInstaller(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked fileInstaller
|
||||
// mockedfileInstaller := &fileInstallerMock{
|
||||
// installContentFunc: func(reader io.Reader, s string, v os.FileMode) error {
|
||||
// panic("mock out the installContent method")
|
||||
// },
|
||||
// installFileFunc: func(s1 string, s2 string) (os.FileMode, error) {
|
||||
// panic("mock out the installFile method")
|
||||
// },
|
||||
// installSymlinkFunc: func(s1 string, s2 string) error {
|
||||
// panic("mock out the installSymlink method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedfileInstaller in code that requires fileInstaller
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type fileInstallerMock struct {
|
||||
// installContentFunc mocks the installContent method.
|
||||
installContentFunc func(reader io.Reader, s string, v os.FileMode) error
|
||||
|
||||
// installFileFunc mocks the installFile method.
|
||||
installFileFunc func(s1 string, s2 string) (os.FileMode, error)
|
||||
|
||||
// installSymlinkFunc mocks the installSymlink method.
|
||||
installSymlinkFunc func(s1 string, s2 string) error
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// installContent holds details about calls to the installContent method.
|
||||
installContent []struct {
|
||||
// Reader is the reader argument value.
|
||||
Reader io.Reader
|
||||
// S is the s argument value.
|
||||
S string
|
||||
// V is the v argument value.
|
||||
V os.FileMode
|
||||
}
|
||||
// installFile holds details about calls to the installFile method.
|
||||
installFile []struct {
|
||||
// S1 is the s1 argument value.
|
||||
S1 string
|
||||
// S2 is the s2 argument value.
|
||||
S2 string
|
||||
}
|
||||
// installSymlink holds details about calls to the installSymlink method.
|
||||
installSymlink []struct {
|
||||
// S1 is the s1 argument value.
|
||||
S1 string
|
||||
// S2 is the s2 argument value.
|
||||
S2 string
|
||||
}
|
||||
}
|
||||
lockinstallContent sync.RWMutex
|
||||
lockinstallFile sync.RWMutex
|
||||
lockinstallSymlink sync.RWMutex
|
||||
}
|
||||
|
||||
// installContent calls installContentFunc.
|
||||
func (mock *fileInstallerMock) installContent(reader io.Reader, s string, v os.FileMode) error {
|
||||
if mock.installContentFunc == nil {
|
||||
panic("fileInstallerMock.installContentFunc: method is nil but fileInstaller.installContent was just called")
|
||||
}
|
||||
callInfo := struct {
|
||||
Reader io.Reader
|
||||
S string
|
||||
V os.FileMode
|
||||
}{
|
||||
Reader: reader,
|
||||
S: s,
|
||||
V: v,
|
||||
}
|
||||
mock.lockinstallContent.Lock()
|
||||
mock.calls.installContent = append(mock.calls.installContent, callInfo)
|
||||
mock.lockinstallContent.Unlock()
|
||||
return mock.installContentFunc(reader, s, v)
|
||||
}
|
||||
|
||||
// installContentCalls gets all the calls that were made to installContent.
|
||||
// Check the length with:
|
||||
//
|
||||
// len(mockedfileInstaller.installContentCalls())
|
||||
func (mock *fileInstallerMock) installContentCalls() []struct {
|
||||
Reader io.Reader
|
||||
S string
|
||||
V os.FileMode
|
||||
} {
|
||||
var calls []struct {
|
||||
Reader io.Reader
|
||||
S string
|
||||
V os.FileMode
|
||||
}
|
||||
mock.lockinstallContent.RLock()
|
||||
calls = mock.calls.installContent
|
||||
mock.lockinstallContent.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// installFile calls installFileFunc.
|
||||
func (mock *fileInstallerMock) installFile(s1 string, s2 string) (os.FileMode, error) {
|
||||
if mock.installFileFunc == nil {
|
||||
panic("fileInstallerMock.installFileFunc: method is nil but fileInstaller.installFile was just called")
|
||||
}
|
||||
callInfo := struct {
|
||||
S1 string
|
||||
S2 string
|
||||
}{
|
||||
S1: s1,
|
||||
S2: s2,
|
||||
}
|
||||
mock.lockinstallFile.Lock()
|
||||
mock.calls.installFile = append(mock.calls.installFile, callInfo)
|
||||
mock.lockinstallFile.Unlock()
|
||||
return mock.installFileFunc(s1, s2)
|
||||
}
|
||||
|
||||
// installFileCalls gets all the calls that were made to installFile.
|
||||
// Check the length with:
|
||||
//
|
||||
// len(mockedfileInstaller.installFileCalls())
|
||||
func (mock *fileInstallerMock) installFileCalls() []struct {
|
||||
S1 string
|
||||
S2 string
|
||||
} {
|
||||
var calls []struct {
|
||||
S1 string
|
||||
S2 string
|
||||
}
|
||||
mock.lockinstallFile.RLock()
|
||||
calls = mock.calls.installFile
|
||||
mock.lockinstallFile.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// installSymlink calls installSymlinkFunc.
|
||||
func (mock *fileInstallerMock) installSymlink(s1 string, s2 string) error {
|
||||
if mock.installSymlinkFunc == nil {
|
||||
panic("fileInstallerMock.installSymlinkFunc: method is nil but fileInstaller.installSymlink was just called")
|
||||
}
|
||||
callInfo := struct {
|
||||
S1 string
|
||||
S2 string
|
||||
}{
|
||||
S1: s1,
|
||||
S2: s2,
|
||||
}
|
||||
mock.lockinstallSymlink.Lock()
|
||||
mock.calls.installSymlink = append(mock.calls.installSymlink, callInfo)
|
||||
mock.lockinstallSymlink.Unlock()
|
||||
return mock.installSymlinkFunc(s1, s2)
|
||||
}
|
||||
|
||||
// installSymlinkCalls gets all the calls that were made to installSymlink.
|
||||
// Check the length with:
|
||||
//
|
||||
// len(mockedfileInstaller.installSymlinkCalls())
|
||||
func (mock *fileInstallerMock) installSymlinkCalls() []struct {
|
||||
S1 string
|
||||
S2 string
|
||||
} {
|
||||
var calls []struct {
|
||||
S1 string
|
||||
S2 string
|
||||
}
|
||||
mock.lockinstallSymlink.RLock()
|
||||
calls = mock.calls.installSymlink
|
||||
mock.lockinstallSymlink.RUnlock()
|
||||
return calls
|
||||
}
|
||||
@@ -1,168 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
//go:generate moq -rm -fmt=goimports -out installer_mock.go . Installer
|
||||
type Installer interface {
|
||||
Install(string) error
|
||||
}
|
||||
|
||||
type toolkitInstaller struct {
|
||||
logger logger.Interface
|
||||
ignoreErrors bool
|
||||
sourceRoot string
|
||||
|
||||
artifactRoot *artifactRoot
|
||||
|
||||
ensureTargetDirectory Installer
|
||||
}
|
||||
|
||||
var _ Installer = (*toolkitInstaller)(nil)
|
||||
|
||||
// New creates a toolkit installer with the specified options.
|
||||
func New(opts ...Option) (Installer, error) {
|
||||
t := &toolkitInstaller{}
|
||||
for _, opt := range opts {
|
||||
opt(t)
|
||||
}
|
||||
|
||||
if t.logger == nil {
|
||||
t.logger = logger.New()
|
||||
}
|
||||
if t.sourceRoot == "" {
|
||||
t.sourceRoot = "/"
|
||||
}
|
||||
if t.artifactRoot == nil {
|
||||
artifactRoot, err := newArtifactRoot(t.logger, t.sourceRoot)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t.artifactRoot = artifactRoot
|
||||
}
|
||||
|
||||
if t.ensureTargetDirectory == nil {
|
||||
t.ensureTargetDirectory = t.createDirectory()
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// Install ensures that the required toolkit files are installed in the specified directory.
|
||||
func (t *toolkitInstaller) Install(destDir string) error {
|
||||
var installers []Installer
|
||||
|
||||
installers = append(installers, t.ensureTargetDirectory)
|
||||
|
||||
libraries, err := t.collectLibraries()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to collect libraries: %w", err)
|
||||
}
|
||||
installers = append(installers, libraries...)
|
||||
|
||||
executables, err := t.collectExecutables(destDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to collect executables: %w", err)
|
||||
}
|
||||
installers = append(installers, executables...)
|
||||
|
||||
var errs error
|
||||
for _, i := range installers {
|
||||
errs = errors.Join(errs, i.Install(destDir))
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
type symlink struct {
|
||||
linkname string
|
||||
target string
|
||||
}
|
||||
|
||||
func (s symlink) Install(destDir string) error {
|
||||
symlinkPath := filepath.Join(destDir, s.linkname)
|
||||
return installSymlink(s.target, symlinkPath)
|
||||
}
|
||||
|
||||
//go:generate moq -rm -fmt=goimports -out file-installer_mock.go . fileInstaller
|
||||
type fileInstaller interface {
|
||||
installContent(io.Reader, string, os.FileMode) error
|
||||
installFile(string, string) (os.FileMode, error)
|
||||
installSymlink(string, string) error
|
||||
}
|
||||
|
||||
var installSymlink = installSymlinkStub
|
||||
|
||||
func installSymlinkStub(target string, link string) error {
|
||||
err := os.Symlink(target, link)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating symlink '%v' => '%v': %v", link, target, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var installFile = installFileStub
|
||||
|
||||
func installFileStub(src string, dest string) (os.FileMode, error) {
|
||||
sourceInfo, err := os.Stat(src)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error getting file info for '%v': %v", src, err)
|
||||
}
|
||||
|
||||
source, err := os.Open(src)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening source: %w", err)
|
||||
}
|
||||
defer source.Close()
|
||||
|
||||
mode := sourceInfo.Mode()
|
||||
if err := installContent(source, dest, mode); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return mode, nil
|
||||
}
|
||||
|
||||
var installContent = installContentStub
|
||||
|
||||
func installContentStub(content io.Reader, dest string, mode fs.FileMode) error {
|
||||
destination, err := os.Create(dest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating destination: %w", err)
|
||||
}
|
||||
defer destination.Close()
|
||||
|
||||
_, err = io.Copy(destination, content)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error copying file: %w", err)
|
||||
}
|
||||
err = os.Chmod(dest, mode)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error setting mode for '%v': %v", dest, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that InstallerMock does implement Installer.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ Installer = &InstallerMock{}
|
||||
|
||||
// InstallerMock is a mock implementation of Installer.
|
||||
//
|
||||
// func TestSomethingThatUsesInstaller(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked Installer
|
||||
// mockedInstaller := &InstallerMock{
|
||||
// InstallFunc: func(s string) error {
|
||||
// panic("mock out the Install method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedInstaller in code that requires Installer
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type InstallerMock struct {
|
||||
// InstallFunc mocks the Install method.
|
||||
InstallFunc func(s string) error
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Install holds details about calls to the Install method.
|
||||
Install []struct {
|
||||
// S is the s argument value.
|
||||
S string
|
||||
}
|
||||
}
|
||||
lockInstall sync.RWMutex
|
||||
}
|
||||
|
||||
// Install calls InstallFunc.
|
||||
func (mock *InstallerMock) Install(s string) error {
|
||||
if mock.InstallFunc == nil {
|
||||
panic("InstallerMock.InstallFunc: method is nil but Installer.Install was just called")
|
||||
}
|
||||
callInfo := struct {
|
||||
S string
|
||||
}{
|
||||
S: s,
|
||||
}
|
||||
mock.lockInstall.Lock()
|
||||
mock.calls.Install = append(mock.calls.Install, callInfo)
|
||||
mock.lockInstall.Unlock()
|
||||
return mock.InstallFunc(s)
|
||||
}
|
||||
|
||||
// InstallCalls gets all the calls that were made to Install.
|
||||
// Check the length with:
|
||||
//
|
||||
// len(mockedInstaller.InstallCalls())
|
||||
func (mock *InstallerMock) InstallCalls() []struct {
|
||||
S string
|
||||
} {
|
||||
var calls []struct {
|
||||
S string
|
||||
}
|
||||
mock.lockInstall.RLock()
|
||||
calls = mock.calls.Install
|
||||
mock.lockInstall.RUnlock()
|
||||
return calls
|
||||
}
|
||||
@@ -1,251 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
func TestToolkitInstaller(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
type contentCall struct {
|
||||
wrapper string
|
||||
path string
|
||||
mode fs.FileMode
|
||||
}
|
||||
var contentCalls []contentCall
|
||||
|
||||
installer := &fileInstallerMock{
|
||||
installFileFunc: func(s1, s2 string) (os.FileMode, error) {
|
||||
return 0666, nil
|
||||
},
|
||||
installContentFunc: func(reader io.Reader, s string, fileMode fs.FileMode) error {
|
||||
var b bytes.Buffer
|
||||
if _, err := b.ReadFrom(reader); err != nil {
|
||||
return err
|
||||
}
|
||||
contents := contentCall{
|
||||
wrapper: b.String(),
|
||||
path: s,
|
||||
mode: fileMode,
|
||||
}
|
||||
|
||||
contentCalls = append(contentCalls, contents)
|
||||
return nil
|
||||
},
|
||||
installSymlinkFunc: func(s1, s2 string) error {
|
||||
return nil
|
||||
},
|
||||
}
|
||||
installFile = installer.installFile
|
||||
installContent = installer.installContent
|
||||
installSymlink = installer.installSymlink
|
||||
|
||||
root := "/artifacts/test"
|
||||
libraries := &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
switch s {
|
||||
case "libnvidia-container.so.1":
|
||||
return []string{filepath.Join(root, "libnvidia-container.so.987.65.43")}, nil
|
||||
case "libnvidia-container-go.so.1":
|
||||
return []string{filepath.Join(root, "libnvidia-container-go.so.1.23.4")}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("%v not found", s)
|
||||
},
|
||||
}
|
||||
executables := &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
switch s {
|
||||
case "nvidia-container-runtime.cdi":
|
||||
fallthrough
|
||||
case "nvidia-container-runtime.legacy":
|
||||
fallthrough
|
||||
case "nvidia-container-runtime":
|
||||
fallthrough
|
||||
case "nvidia-ctk":
|
||||
fallthrough
|
||||
case "nvidia-container-cli":
|
||||
fallthrough
|
||||
case "nvidia-container-runtime-hook":
|
||||
fallthrough
|
||||
case "nvidia-cdi-hook":
|
||||
return []string{filepath.Join(root, "usr/bin", s)}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("%v not found", s)
|
||||
},
|
||||
}
|
||||
|
||||
r := &artifactRoot{
|
||||
libraries: libraries,
|
||||
executables: executables,
|
||||
}
|
||||
|
||||
createDirectory := &InstallerMock{
|
||||
InstallFunc: func(c string) error {
|
||||
return nil
|
||||
},
|
||||
}
|
||||
i := toolkitInstaller{
|
||||
logger: logger,
|
||||
artifactRoot: r,
|
||||
ensureTargetDirectory: createDirectory,
|
||||
}
|
||||
|
||||
err := i.Install("/foo/bar/baz")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.ElementsMatch(t,
|
||||
[]struct {
|
||||
S string
|
||||
}{
|
||||
{"/foo/bar/baz"},
|
||||
},
|
||||
createDirectory.InstallCalls(),
|
||||
)
|
||||
|
||||
require.ElementsMatch(t,
|
||||
installer.installFileCalls(),
|
||||
[]struct {
|
||||
S1 string
|
||||
S2 string
|
||||
}{
|
||||
{"/artifacts/test/libnvidia-container-go.so.1.23.4", "/foo/bar/baz/libnvidia-container-go.so.1.23.4"},
|
||||
{"/artifacts/test/libnvidia-container.so.987.65.43", "/foo/bar/baz/libnvidia-container.so.987.65.43"},
|
||||
{"/artifacts/test/usr/bin/nvidia-container-runtime.cdi", "/foo/bar/baz/nvidia-container-runtime.cdi.real"},
|
||||
{"/artifacts/test/usr/bin/nvidia-container-runtime.legacy", "/foo/bar/baz/nvidia-container-runtime.legacy.real"},
|
||||
{"/artifacts/test/usr/bin/nvidia-container-runtime", "/foo/bar/baz/nvidia-container-runtime.real"},
|
||||
{"/artifacts/test/usr/bin/nvidia-ctk", "/foo/bar/baz/nvidia-ctk.real"},
|
||||
{"/artifacts/test/usr/bin/nvidia-cdi-hook", "/foo/bar/baz/nvidia-cdi-hook.real"},
|
||||
{"/artifacts/test/usr/bin/nvidia-container-cli", "/foo/bar/baz/nvidia-container-cli.real"},
|
||||
{"/artifacts/test/usr/bin/nvidia-container-runtime-hook", "/foo/bar/baz/nvidia-container-runtime-hook.real"},
|
||||
},
|
||||
)
|
||||
|
||||
require.ElementsMatch(t,
|
||||
installer.installSymlinkCalls(),
|
||||
[]struct {
|
||||
S1 string
|
||||
S2 string
|
||||
}{
|
||||
{"libnvidia-container-go.so.1.23.4", "/foo/bar/baz/libnvidia-container-go.so.1"},
|
||||
{"libnvidia-container.so.987.65.43", "/foo/bar/baz/libnvidia-container.so.1"},
|
||||
{"nvidia-container-runtime-hook", "/foo/bar/baz/nvidia-container-toolkit"},
|
||||
},
|
||||
)
|
||||
|
||||
require.ElementsMatch(t,
|
||||
contentCalls,
|
||||
[]contentCall{
|
||||
{
|
||||
path: "/foo/bar/baz/nvidia-container-runtime",
|
||||
mode: 0777,
|
||||
wrapper: `#! /bin/sh
|
||||
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
|
||||
if [ "${?}" != "0" ]; then
|
||||
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
||||
exec runc "$@"
|
||||
fi
|
||||
PATH=/foo/bar/baz:$PATH \
|
||||
XDG_CONFIG_HOME=/foo/bar/baz/.config \
|
||||
/foo/bar/baz/nvidia-container-runtime.real \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: "/foo/bar/baz/nvidia-container-runtime.cdi",
|
||||
mode: 0777,
|
||||
wrapper: `#! /bin/sh
|
||||
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
|
||||
if [ "${?}" != "0" ]; then
|
||||
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
||||
exec runc "$@"
|
||||
fi
|
||||
PATH=/foo/bar/baz:$PATH \
|
||||
XDG_CONFIG_HOME=/foo/bar/baz/.config \
|
||||
/foo/bar/baz/nvidia-container-runtime.cdi.real \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: "/foo/bar/baz/nvidia-container-runtime.legacy",
|
||||
mode: 0777,
|
||||
wrapper: `#! /bin/sh
|
||||
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
|
||||
if [ "${?}" != "0" ]; then
|
||||
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
||||
exec runc "$@"
|
||||
fi
|
||||
PATH=/foo/bar/baz:$PATH \
|
||||
XDG_CONFIG_HOME=/foo/bar/baz/.config \
|
||||
/foo/bar/baz/nvidia-container-runtime.legacy.real \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: "/foo/bar/baz/nvidia-ctk",
|
||||
mode: 0777,
|
||||
wrapper: `#! /bin/sh
|
||||
PATH=/foo/bar/baz:$PATH \
|
||||
/foo/bar/baz/nvidia-ctk.real \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: "/foo/bar/baz/nvidia-cdi-hook",
|
||||
mode: 0777,
|
||||
wrapper: `#! /bin/sh
|
||||
PATH=/foo/bar/baz:$PATH \
|
||||
/foo/bar/baz/nvidia-cdi-hook.real \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: "/foo/bar/baz/nvidia-container-cli",
|
||||
mode: 0777,
|
||||
wrapper: `#! /bin/sh
|
||||
LD_LIBRARY_PATH=/foo/bar/baz:$LD_LIBRARY_PATH \
|
||||
PATH=/foo/bar/baz:$PATH \
|
||||
/foo/bar/baz/nvidia-container-cli.real \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: "/foo/bar/baz/nvidia-container-runtime-hook",
|
||||
mode: 0777,
|
||||
wrapper: `#! /bin/sh
|
||||
PATH=/foo/bar/baz:$PATH \
|
||||
/foo/bar/baz/nvidia-container-runtime-hook.real \
|
||||
-config /foo/bar/baz/.config/nvidia-container-runtime/config.toml \
|
||||
"$@"
|
||||
`,
|
||||
},
|
||||
},
|
||||
)
|
||||
}
|
||||
@@ -1,73 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package installer
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// collectLibraries locates and installs the libraries that are part of
|
||||
// the nvidia-container-toolkit.
|
||||
// A predefined set of library candidates are considered, with the first one
|
||||
// resulting in success being installed to the toolkit folder. The install process
|
||||
// resolves the symlink for the library and copies the versioned library itself.
|
||||
func (t *toolkitInstaller) collectLibraries() ([]Installer, error) {
|
||||
requiredLibraries := []string{
|
||||
"libnvidia-container.so.1",
|
||||
"libnvidia-container-go.so.1",
|
||||
}
|
||||
|
||||
var installers []Installer
|
||||
for _, l := range requiredLibraries {
|
||||
libraryPath, err := t.artifactRoot.findLibrary(l)
|
||||
if err != nil {
|
||||
if t.ignoreErrors {
|
||||
log.Errorf("Ignoring error: %v", err)
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
installers = append(installers, library(libraryPath))
|
||||
|
||||
if filepath.Base(libraryPath) == l {
|
||||
continue
|
||||
}
|
||||
|
||||
link := symlink{
|
||||
linkname: l,
|
||||
target: filepath.Base(libraryPath),
|
||||
}
|
||||
installers = append(installers, link)
|
||||
}
|
||||
|
||||
return installers, nil
|
||||
}
|
||||
|
||||
type library string
|
||||
|
||||
// Install copies the library l to the destination folder.
|
||||
// The same basename is used in the destination folder.
|
||||
func (l library) Install(destinationDir string) error {
|
||||
dest := filepath.Join(destinationDir, filepath.Base(string(l)))
|
||||
|
||||
_, err := installFile(string(l), dest)
|
||||
return err
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
/**
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package installer
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
type Option func(*toolkitInstaller)
|
||||
|
||||
func WithLogger(logger logger.Interface) Option {
|
||||
return func(ti *toolkitInstaller) {
|
||||
ti.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
func WithArtifactRoot(artifactRoot *artifactRoot) Option {
|
||||
return func(ti *toolkitInstaller) {
|
||||
ti.artifactRoot = artifactRoot
|
||||
}
|
||||
}
|
||||
|
||||
func WithIgnoreErrors(ignoreErrors bool) Option {
|
||||
return func(ti *toolkitInstaller) {
|
||||
ti.ignoreErrors = ignoreErrors
|
||||
}
|
||||
}
|
||||
|
||||
// WithSourceRoot sets the root directory for locating artifacts to be installed.
|
||||
func WithSourceRoot(sourceRoot string) Option {
|
||||
return func(ti *toolkitInstaller) {
|
||||
ti.sourceRoot = sourceRoot
|
||||
}
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package toolkit
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
// An Option provides a mechanism to configure an Installer.
|
||||
type Option func(*Installer)
|
||||
|
||||
func WithLogger(logger logger.Interface) Option {
|
||||
return func(i *Installer) {
|
||||
i.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
func WithToolkitRoot(toolkitRoot string) Option {
|
||||
return func(i *Installer) {
|
||||
i.toolkitRoot = toolkitRoot
|
||||
}
|
||||
}
|
||||
|
||||
func WithSourceRoot(sourceRoot string) Option {
|
||||
return func(i *Installer) {
|
||||
i.sourceRoot = sourceRoot
|
||||
}
|
||||
}
|
||||
@@ -1,537 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package toolkit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
"tags.cncf.io/container-device-interface/pkg/parser"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit/installer"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory
|
||||
DefaultNvidiaDriverRoot = "/run/nvidia/driver"
|
||||
|
||||
configFilename = "config.toml"
|
||||
)
|
||||
|
||||
type cdiOptions struct {
|
||||
Enabled bool
|
||||
outputDir string
|
||||
kind string
|
||||
vendor string
|
||||
class string
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
DriverRoot string
|
||||
DevRoot string
|
||||
DriverRootCtrPath string
|
||||
DevRootCtrPath string
|
||||
|
||||
ContainerRuntimeMode string
|
||||
ContainerRuntimeDebug string
|
||||
ContainerRuntimeLogLevel string
|
||||
|
||||
ContainerRuntimeModesCdiDefaultKind string
|
||||
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
|
||||
|
||||
ContainerRuntimeRuntimes cli.StringSlice
|
||||
|
||||
ContainerRuntimeHookSkipModeDetection bool
|
||||
|
||||
ContainerCLIDebug string
|
||||
|
||||
// CDI stores the CDI options for the toolkit.
|
||||
CDI cdiOptions
|
||||
|
||||
createDeviceNodes cli.StringSlice
|
||||
|
||||
acceptNVIDIAVisibleDevicesWhenUnprivileged bool
|
||||
acceptNVIDIAVisibleDevicesAsVolumeMounts bool
|
||||
|
||||
ignoreErrors bool
|
||||
|
||||
optInFeatures cli.StringSlice
|
||||
}
|
||||
|
||||
func Flags(opts *Options) []cli.Flag {
|
||||
flags := []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "driver-root",
|
||||
Aliases: []string{"nvidia-driver-root"},
|
||||
Value: DefaultNvidiaDriverRoot,
|
||||
Destination: &opts.DriverRoot,
|
||||
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "driver-root-ctr-path",
|
||||
Value: DefaultNvidiaDriverRoot,
|
||||
Destination: &opts.DriverRootCtrPath,
|
||||
EnvVars: []string{"DRIVER_ROOT_CTR_PATH"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "dev-root",
|
||||
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
|
||||
Destination: &opts.DevRoot,
|
||||
EnvVars: []string{"NVIDIA_DEV_ROOT", "DEV_ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "dev-root-ctr-path",
|
||||
Usage: "Specify the root where `/dev` is located in the container. If this is not specified, the driver-root-ctr-path is assumed.",
|
||||
Destination: &opts.DevRootCtrPath,
|
||||
EnvVars: []string{"DEV_ROOT_CTR_PATH"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime.debug",
|
||||
Aliases: []string{"nvidia-container-runtime-debug"},
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container Runtime",
|
||||
Destination: &opts.ContainerRuntimeDebug,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime.log-level",
|
||||
Aliases: []string{"nvidia-container-runtime-debug-log-level"},
|
||||
Destination: &opts.ContainerRuntimeLogLevel,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime.mode",
|
||||
Aliases: []string{"nvidia-container-runtime-mode"},
|
||||
Destination: &opts.ContainerRuntimeMode,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODE"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime.modes.cdi.default-kind",
|
||||
Destination: &opts.ContainerRuntimeModesCdiDefaultKind,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "nvidia-container-runtime.modes.cdi.annotation-prefixes",
|
||||
Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "nvidia-container-runtime.runtimes",
|
||||
Destination: &opts.ContainerRuntimeRuntimes,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_RUNTIMES"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "nvidia-container-runtime-hook.skip-mode-detection",
|
||||
Value: true,
|
||||
Destination: &opts.ContainerRuntimeHookSkipModeDetection,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-cli.debug",
|
||||
Aliases: []string{"nvidia-container-cli-debug"},
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container CLI",
|
||||
Destination: &opts.ContainerCLIDebug,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_CLI_DEBUG"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "accept-nvidia-visible-devices-envvar-when-unprivileged",
|
||||
Usage: "Set the accept-nvidia-visible-devices-envvar-when-unprivileged config option",
|
||||
Value: true,
|
||||
Destination: &opts.acceptNVIDIAVisibleDevicesWhenUnprivileged,
|
||||
EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_ENVVAR_WHEN_UNPRIVILEGED"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "accept-nvidia-visible-devices-as-volume-mounts",
|
||||
Usage: "Set the accept-nvidia-visible-devices-as-volume-mounts config option",
|
||||
Destination: &opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
|
||||
EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "cdi-enabled",
|
||||
Aliases: []string{"enable-cdi"},
|
||||
Usage: "enable the generation of a CDI specification",
|
||||
Destination: &opts.CDI.Enabled,
|
||||
EnvVars: []string{"CDI_ENABLED", "ENABLE_CDI"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "cdi-output-dir",
|
||||
Usage: "the directory where the CDI output files are to be written. If this is set to '', no CDI specification is generated.",
|
||||
Value: "/var/run/cdi",
|
||||
Destination: &opts.CDI.outputDir,
|
||||
EnvVars: []string{"CDI_OUTPUT_DIR"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "cdi-kind",
|
||||
Usage: "the vendor string to use for the generated CDI specification",
|
||||
Value: "management.nvidia.com/gpu",
|
||||
Destination: &opts.CDI.kind,
|
||||
EnvVars: []string{"CDI_KIND"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "ignore-errors",
|
||||
Usage: "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only.",
|
||||
Hidden: true,
|
||||
Destination: &opts.ignoreErrors,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "create-device-nodes",
|
||||
Usage: "(Only applicable with --cdi-enabled) specifies which device nodes should be created. If any one of the options is set to '' or 'none', no device nodes will be created.",
|
||||
Value: cli.NewStringSlice("control"),
|
||||
Destination: &opts.createDeviceNodes,
|
||||
EnvVars: []string{"CREATE_DEVICE_NODES"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "opt-in-features",
|
||||
Hidden: true,
|
||||
Destination: &opts.optInFeatures,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_TOOLKIT_OPT_IN_FEATURES"},
|
||||
},
|
||||
}
|
||||
|
||||
return flags
|
||||
}
|
||||
|
||||
// An Installer is used to install the NVIDIA Container Toolkit from the toolkit container.
|
||||
type Installer struct {
|
||||
logger logger.Interface
|
||||
sourceRoot string
|
||||
// toolkitRoot specifies the destination path at which the toolkit is installed.
|
||||
toolkitRoot string
|
||||
}
|
||||
|
||||
// NewInstaller creates an installer for the NVIDIA Container Toolkit.
|
||||
func NewInstaller(opts ...Option) *Installer {
|
||||
i := &Installer{}
|
||||
for _, opt := range opts {
|
||||
opt(i)
|
||||
}
|
||||
|
||||
if i.logger == nil {
|
||||
i.logger = logger.New()
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
// ValidateOptions checks whether the specified options are valid
|
||||
func (t *Installer) ValidateOptions(opts *Options) error {
|
||||
if t == nil {
|
||||
return fmt.Errorf("toolkit installer is not initilized")
|
||||
}
|
||||
if t.toolkitRoot == "" {
|
||||
return fmt.Errorf("invalid --toolkit-root option: %v", t.toolkitRoot)
|
||||
}
|
||||
|
||||
vendor, class := parser.ParseQualifier(opts.CDI.kind)
|
||||
if err := parser.ValidateVendorName(vendor); err != nil {
|
||||
return fmt.Errorf("invalid CDI vendor name: %v", err)
|
||||
}
|
||||
if err := parser.ValidateClassName(class); err != nil {
|
||||
return fmt.Errorf("invalid CDI class name: %v", err)
|
||||
}
|
||||
opts.CDI.vendor = vendor
|
||||
opts.CDI.class = class
|
||||
|
||||
if opts.CDI.Enabled && opts.CDI.outputDir == "" {
|
||||
t.logger.Warning("Skipping CDI spec generation (no output directory specified)")
|
||||
opts.CDI.Enabled = false
|
||||
}
|
||||
|
||||
isDisabled := false
|
||||
for _, mode := range opts.createDeviceNodes.Value() {
|
||||
if mode != "" && mode != "none" && mode != "control" {
|
||||
return fmt.Errorf("invalid --create-device-nodes value: %v", mode)
|
||||
}
|
||||
if mode == "" || mode == "none" {
|
||||
isDisabled = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !opts.CDI.Enabled && !isDisabled {
|
||||
t.logger.Info("disabling device node creation since --cdi-enabled=false")
|
||||
isDisabled = true
|
||||
}
|
||||
if isDisabled {
|
||||
opts.createDeviceNodes = *cli.NewStringSlice()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Install installs the components of the NVIDIA container toolkit.
|
||||
// Any existing installation is removed.
|
||||
func (t *Installer) Install(cli *cli.Context, opts *Options) error {
|
||||
if t == nil {
|
||||
return fmt.Errorf("toolkit installer is not initilized")
|
||||
}
|
||||
t.logger.Infof("Installing NVIDIA container toolkit to '%v'", t.toolkitRoot)
|
||||
|
||||
t.logger.Infof("Removing existing NVIDIA container toolkit installation")
|
||||
err := os.RemoveAll(t.toolkitRoot)
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error removing toolkit directory: %v", err)
|
||||
} else if err != nil {
|
||||
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err))
|
||||
}
|
||||
|
||||
// Create a toolkit installer to actually install the toolkit components.
|
||||
toolkit, err := installer.New(
|
||||
installer.WithLogger(t.logger),
|
||||
installer.WithSourceRoot(t.sourceRoot),
|
||||
installer.WithIgnoreErrors(opts.ignoreErrors),
|
||||
)
|
||||
if err != nil {
|
||||
if !opts.ignoreErrors {
|
||||
return fmt.Errorf("could not create toolkit installer: %w", err)
|
||||
}
|
||||
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not create toolkit installer: %w", err))
|
||||
}
|
||||
if err := toolkit.Install(t.toolkitRoot); err != nil {
|
||||
if !opts.ignoreErrors {
|
||||
return fmt.Errorf("could not install toolkit components: %w", err)
|
||||
}
|
||||
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not install toolkit components: %w", err))
|
||||
}
|
||||
|
||||
err = t.installToolkitConfig(cli, opts)
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
|
||||
} else if err != nil {
|
||||
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err))
|
||||
}
|
||||
|
||||
err = t.createDeviceNodes(opts)
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error creating device nodes: %v", err)
|
||||
} else if err != nil {
|
||||
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error creating device nodes: %v", err))
|
||||
}
|
||||
|
||||
nvidiaCDIHookPath := filepath.Join(t.toolkitRoot, "nvidia-cdi-hook")
|
||||
err = t.generateCDISpec(opts, nvidiaCDIHookPath)
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error generating CDI specification: %v", err)
|
||||
} else if err != nil {
|
||||
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error generating CDI specification: %v", err))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
|
||||
// that the settings are updated to match the desired install and nvidia driver directories.
|
||||
func (t *Installer) installToolkitConfig(c *cli.Context, opts *Options) error {
|
||||
toolkitConfigDir := filepath.Join(t.toolkitRoot, ".config", "nvidia-container-runtime")
|
||||
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
|
||||
|
||||
t.logger.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
|
||||
|
||||
err := t.createDirectories(toolkitConfigDir)
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("could not create required directories: %v", err)
|
||||
} else if err != nil {
|
||||
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err))
|
||||
}
|
||||
nvidiaContainerCliExecutablePath := filepath.Join(t.toolkitRoot, "nvidia-container-cli")
|
||||
nvidiaCTKPath := filepath.Join(t.toolkitRoot, "nvidia-ctk")
|
||||
nvidiaContainerRuntimeHookPath := filepath.Join(t.toolkitRoot, "nvidia-container-runtime-hook")
|
||||
|
||||
cfg, err := config.New()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open source config file: %v", err)
|
||||
}
|
||||
|
||||
targetConfig, err := os.Create(toolkitConfigPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not create target config file: %v", err)
|
||||
}
|
||||
defer targetConfig.Close()
|
||||
|
||||
// Read the ldconfig path from the config as this may differ per platform
|
||||
// On ubuntu-based systems this ends in `.real`
|
||||
ldconfigPath := fmt.Sprintf("%s", cfg.GetDefault("nvidia-container-cli.ldconfig", "/sbin/ldconfig"))
|
||||
// Use the driver run root as the root:
|
||||
driverLdconfigPath := config.NormalizeLDConfigPath("@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/")))
|
||||
|
||||
configValues := map[string]interface{}{
|
||||
// Set the options in the root toml table
|
||||
"accept-nvidia-visible-devices-envvar-when-unprivileged": opts.acceptNVIDIAVisibleDevicesWhenUnprivileged,
|
||||
"accept-nvidia-visible-devices-as-volume-mounts": opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
|
||||
// Set the nvidia-container-cli options
|
||||
"nvidia-container-cli.root": opts.DriverRoot,
|
||||
"nvidia-container-cli.path": nvidiaContainerCliExecutablePath,
|
||||
"nvidia-container-cli.ldconfig": driverLdconfigPath,
|
||||
// Set nvidia-ctk options
|
||||
"nvidia-ctk.path": nvidiaCTKPath,
|
||||
// Set the nvidia-container-runtime-hook options
|
||||
"nvidia-container-runtime-hook.path": nvidiaContainerRuntimeHookPath,
|
||||
"nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection,
|
||||
}
|
||||
|
||||
toolkitRuntimeList := opts.ContainerRuntimeRuntimes.Value()
|
||||
if len(toolkitRuntimeList) > 0 {
|
||||
configValues["nvidia-container-runtime.runtimes"] = toolkitRuntimeList
|
||||
}
|
||||
|
||||
for _, optInFeature := range opts.optInFeatures.Value() {
|
||||
configValues["features."+optInFeature] = true
|
||||
}
|
||||
|
||||
for key, value := range configValues {
|
||||
cfg.Set(key, value)
|
||||
}
|
||||
|
||||
// Set the optional config options
|
||||
optionalConfigValues := map[string]interface{}{
|
||||
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
|
||||
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
|
||||
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode,
|
||||
"nvidia-container-runtime.modes.cdi.annotation-prefixes": opts.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind,
|
||||
"nvidia-container-runtime.runtimes": opts.ContainerRuntimeRuntimes,
|
||||
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
|
||||
}
|
||||
|
||||
for key, value := range optionalConfigValues {
|
||||
if !c.IsSet(key) {
|
||||
t.logger.Infof("Skipping unset option: %v", key)
|
||||
continue
|
||||
}
|
||||
if value == nil {
|
||||
t.logger.Infof("Skipping option with nil value: %v", key)
|
||||
continue
|
||||
}
|
||||
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
case cli.StringSlice:
|
||||
if len(v.Value()) == 0 {
|
||||
continue
|
||||
}
|
||||
value = v.Value()
|
||||
default:
|
||||
t.logger.Warningf("Unexpected type for option %v=%v: %T", key, value, v)
|
||||
}
|
||||
|
||||
cfg.Set(key, value)
|
||||
}
|
||||
|
||||
if _, err := cfg.WriteTo(targetConfig); err != nil {
|
||||
return fmt.Errorf("error writing config: %v", err)
|
||||
}
|
||||
|
||||
os.Stdout.WriteString("Using config:\n")
|
||||
if _, err = cfg.WriteTo(os.Stdout); err != nil {
|
||||
t.logger.Warningf("Failed to output config to STDOUT: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Installer) createDirectories(dir ...string) error {
|
||||
for _, d := range dir {
|
||||
t.logger.Infof("Creating directory '%v'", d)
|
||||
err := os.MkdirAll(d, 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating directory: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Installer) createDeviceNodes(opts *Options) error {
|
||||
modes := opts.createDeviceNodes.Value()
|
||||
if len(modes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices, err := nvdevices.New(
|
||||
nvdevices.WithDevRoot(opts.DevRootCtrPath),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create library: %v", err)
|
||||
}
|
||||
|
||||
for _, mode := range modes {
|
||||
t.logger.Infof("Creating %v device nodes at %v", mode, opts.DevRootCtrPath)
|
||||
if mode != "control" {
|
||||
t.logger.Warningf("Unrecognised device mode: %v", mode)
|
||||
continue
|
||||
}
|
||||
if err := devices.CreateNVIDIAControlDevices(); err != nil {
|
||||
return fmt.Errorf("failed to create control device nodes: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateCDISpec generates a CDI spec for use in management containers
|
||||
func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) error {
|
||||
if !opts.CDI.Enabled {
|
||||
return nil
|
||||
}
|
||||
t.logger.Info("Generating CDI spec for management containers")
|
||||
cdilib, err := nvcdi.New(
|
||||
nvcdi.WithLogger(t.logger),
|
||||
nvcdi.WithMode(nvcdi.ModeManagement),
|
||||
nvcdi.WithDriverRoot(opts.DriverRootCtrPath),
|
||||
nvcdi.WithDevRoot(opts.DevRootCtrPath),
|
||||
nvcdi.WithNVIDIACDIHookPath(nvidiaCDIHookPath),
|
||||
nvcdi.WithVendor(opts.CDI.vendor),
|
||||
nvcdi.WithClass(opts.CDI.class),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create CDI library for management containers: %v", err)
|
||||
}
|
||||
|
||||
spec, err := cdilib.GetSpec()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to genereate CDI spec for management containers: %v", err)
|
||||
}
|
||||
|
||||
transformer := transformroot.NewDriverTransformer(
|
||||
transformroot.WithDriverRoot(opts.DriverRootCtrPath),
|
||||
transformroot.WithTargetDriverRoot(opts.DriverRoot),
|
||||
transformroot.WithDevRoot(opts.DevRootCtrPath),
|
||||
transformroot.WithTargetDevRoot(opts.DevRoot),
|
||||
)
|
||||
if err := transformer.Transform(spec.Raw()); err != nil {
|
||||
return fmt.Errorf("failed to transform driver root in CDI spec: %v", err)
|
||||
}
|
||||
|
||||
name, err := cdi.GenerateNameForSpec(spec.Raw())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI name for management containers: %v", err)
|
||||
}
|
||||
err = spec.Save(filepath.Join(opts.CDI.outputDir, name))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save CDI spec for management containers: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,222 +0,0 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package toolkit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
)
|
||||
|
||||
func TestInstall(t *testing.T) {
|
||||
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
require.NoError(t, err)
|
||||
|
||||
artifactRoot := filepath.Join(moduleRoot, "testdata", "installer", "artifacts")
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
hostRoot string
|
||||
packageType string
|
||||
cdiEnabled bool
|
||||
expectedError error
|
||||
expectedCdiSpec string
|
||||
}{
|
||||
{
|
||||
hostRoot: "rootfs-empty",
|
||||
packageType: "deb",
|
||||
},
|
||||
{
|
||||
hostRoot: "rootfs-empty",
|
||||
packageType: "rpm",
|
||||
},
|
||||
{
|
||||
hostRoot: "rootfs-empty",
|
||||
packageType: "deb",
|
||||
cdiEnabled: true,
|
||||
expectedError: fmt.Errorf("no NVIDIA device nodes found"),
|
||||
},
|
||||
{
|
||||
hostRoot: "rootfs-1",
|
||||
packageType: "deb",
|
||||
cdiEnabled: true,
|
||||
expectedCdiSpec: `---
|
||||
cdiVersion: 0.5.0
|
||||
kind: example.com/class
|
||||
devices:
|
||||
- name: all
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: /host/driver/root/dev/nvidia0
|
||||
- path: /dev/nvidiactl
|
||||
hostPath: /host/driver/root/dev/nvidiactl
|
||||
- path: /dev/nvidia-caps-imex-channels/channel0
|
||||
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel0
|
||||
- path: /dev/nvidia-caps-imex-channels/channel1
|
||||
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel1
|
||||
- path: /dev/nvidia-caps-imex-channels/channel2047
|
||||
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel2047
|
||||
containerEdits:
|
||||
env:
|
||||
- NVIDIA_VISIBLE_DEVICES=void
|
||||
hooks:
|
||||
- hookName: createContainer
|
||||
path: {{ .toolkitRoot }}/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- create-symlinks
|
||||
- --link
|
||||
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
- hookName: createContainer
|
||||
path: {{ .toolkitRoot }}/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- update-ldcache
|
||||
- --folder
|
||||
- /lib/x86_64-linux-gnu
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
mounts:
|
||||
- hostPath: /host/driver/root/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
options:
|
||||
- ro
|
||||
- nosuid
|
||||
- nodev
|
||||
- rbind
|
||||
- rprivate
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
// hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot)
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
testRoot := t.TempDir()
|
||||
toolkitRoot := filepath.Join(testRoot, "toolkit-test")
|
||||
cdiOutputDir := filepath.Join(moduleRoot, "toolkit-test", "/var/cdi")
|
||||
sourceRoot := filepath.Join(artifactRoot, tc.packageType)
|
||||
options := Options{
|
||||
DriverRoot: "/host/driver/root",
|
||||
DriverRootCtrPath: filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot),
|
||||
CDI: cdiOptions{
|
||||
Enabled: tc.cdiEnabled,
|
||||
outputDir: cdiOutputDir,
|
||||
kind: "example.com/class",
|
||||
},
|
||||
}
|
||||
|
||||
ti := NewInstaller(
|
||||
WithLogger(logger),
|
||||
WithToolkitRoot(toolkitRoot),
|
||||
WithSourceRoot(sourceRoot),
|
||||
)
|
||||
require.NoError(t, ti.ValidateOptions(&options))
|
||||
|
||||
err := ti.Install(&cli.Context{}, &options)
|
||||
if tc.expectedError == nil {
|
||||
require.NoError(t, err)
|
||||
} else {
|
||||
require.Contains(t, err.Error(), tc.expectedError.Error())
|
||||
}
|
||||
|
||||
require.DirExists(t, toolkitRoot)
|
||||
requireSymlink(t, toolkitRoot, "libnvidia-container.so.1", "libnvidia-container.so.99.88.77")
|
||||
requireSymlink(t, toolkitRoot, "libnvidia-container-go.so.1", "libnvidia-container-go.so.99.88.77")
|
||||
|
||||
requireWrappedExecutable(t, toolkitRoot, "nvidia-cdi-hook")
|
||||
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-cli")
|
||||
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime")
|
||||
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime-hook")
|
||||
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime.cdi")
|
||||
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime.legacy")
|
||||
requireWrappedExecutable(t, toolkitRoot, "nvidia-ctk")
|
||||
|
||||
requireSymlink(t, toolkitRoot, "nvidia-container-toolkit", "nvidia-container-runtime-hook")
|
||||
|
||||
// TODO: Add checks for wrapper contents
|
||||
// grep -q -E "nvidia driver modules are not yet loaded, invoking runc directly" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime"
|
||||
// grep -q -E "exec runc \".@\"" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime"
|
||||
|
||||
require.DirExists(t, filepath.Join(toolkitRoot, ".config"))
|
||||
require.DirExists(t, filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime"))
|
||||
require.FileExists(t, filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime", "config.toml"))
|
||||
|
||||
cfgToml, err := config.New(config.WithConfigFile(filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime", "config.toml")))
|
||||
require.NoError(t, err)
|
||||
|
||||
cfg, err := cfgToml.Config()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Ensure that the config file has the required contents.
|
||||
// TODO: Add checks for additional config options.
|
||||
require.Equal(t, "/host/driver/root", cfg.NVIDIAContainerCLIConfig.Root)
|
||||
require.Equal(t, "@/host/driver/root/sbin/ldconfig", string(cfg.NVIDIAContainerCLIConfig.Ldconfig))
|
||||
require.EqualValues(t, filepath.Join(toolkitRoot, "nvidia-container-cli"), cfg.NVIDIAContainerCLIConfig.Path)
|
||||
require.EqualValues(t, filepath.Join(toolkitRoot, "nvidia-ctk"), cfg.NVIDIACTKConfig.Path)
|
||||
|
||||
if len(tc.expectedCdiSpec) > 0 {
|
||||
cdiSpecFile := filepath.Join(cdiOutputDir, "example.com-class.yaml")
|
||||
require.FileExists(t, cdiSpecFile)
|
||||
info, err := os.Stat(cdiSpecFile)
|
||||
require.NoError(t, err)
|
||||
require.NotZero(t, info.Mode()&0004)
|
||||
contents, err := os.ReadFile(cdiSpecFile)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, strings.ReplaceAll(tc.expectedCdiSpec, "{{ .toolkitRoot }}", toolkitRoot), string(contents))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func requireWrappedExecutable(t *testing.T, toolkitRoot string, expectedExecutable string) {
|
||||
requireExecutable(t, toolkitRoot, expectedExecutable)
|
||||
requireExecutable(t, toolkitRoot, expectedExecutable+".real")
|
||||
}
|
||||
|
||||
func requireExecutable(t *testing.T, toolkitRoot string, expectedExecutable string) {
|
||||
executable := filepath.Join(toolkitRoot, expectedExecutable)
|
||||
require.FileExists(t, executable)
|
||||
info, err := os.Lstat(executable)
|
||||
require.NoError(t, err)
|
||||
require.Zero(t, info.Mode()&os.ModeSymlink)
|
||||
require.NotZero(t, info.Mode()&0111)
|
||||
}
|
||||
|
||||
func requireSymlink(t *testing.T, toolkitRoot string, expectedLink string, expectedTarget string) {
|
||||
link := filepath.Join(toolkitRoot, expectedLink)
|
||||
require.FileExists(t, link)
|
||||
target, err := symlinks.Resolve(link)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, expectedTarget, target)
|
||||
}
|
||||
@@ -43,7 +43,7 @@ By default, all commands output to `STDOUT`, but specifying the `--output` flag
|
||||
|
||||
### Generate CDI specifications
|
||||
|
||||
The [Container Device Interface (CDI)](https://tags.cncf.io/container-device-interface) provides
|
||||
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
|
||||
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
|
||||
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
|
||||
available NVIDIA GPUs in a system.
|
||||
|
||||
@@ -17,12 +17,11 @@
|
||||
package cdi
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/list"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -23,9 +23,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
cdi "tags.cncf.io/container-device-interface/pkg/parser"
|
||||
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
@@ -44,28 +42,21 @@ type command struct {
|
||||
}
|
||||
|
||||
type options struct {
|
||||
output string
|
||||
format string
|
||||
deviceNameStrategies cli.StringSlice
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCDIHookPath string
|
||||
ldconfigPath string
|
||||
mode string
|
||||
vendor string
|
||||
class string
|
||||
output string
|
||||
format string
|
||||
deviceNameStrategy string
|
||||
driverRoot string
|
||||
nvidiaCTKPath string
|
||||
mode string
|
||||
vendor string
|
||||
class string
|
||||
|
||||
configSearchPaths cli.StringSlice
|
||||
librarySearchPaths cli.StringSlice
|
||||
disabledHooks cli.StringSlice
|
||||
|
||||
csv struct {
|
||||
files cli.StringSlice
|
||||
ignorePatterns cli.StringSlice
|
||||
}
|
||||
|
||||
// the following are used for dependency injection during spec generation.
|
||||
nvmllib nvml.Interface
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
@@ -93,74 +84,44 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "config-search-path",
|
||||
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
|
||||
Destination: &opts.configSearchPaths,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CONFIG_SEARCH_PATHS"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "output",
|
||||
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
|
||||
Destination: &opts.output,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_OUTPUT_FILE_PATH"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "format",
|
||||
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
|
||||
Value: spec.FormatYAML,
|
||||
Destination: &opts.format,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_OUTPUT_FORMAT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "mode",
|
||||
Aliases: []string{"discovery-mode"},
|
||||
Usage: "The mode to use when discovering the available entities. " +
|
||||
"One of [" + strings.Join(nvcdi.AllModes[string](), " | ") + "]. " +
|
||||
"If mode is set to 'auto' the mode will be determined based on the system configuration.",
|
||||
Value: string(nvcdi.ModeAuto),
|
||||
Name: "mode",
|
||||
Aliases: []string{"discovery-mode"},
|
||||
Usage: "The mode to use when discovering the available entities. One of [auto | nvml | wsl]. If mode is set to 'auto' the mode will be determined based on the system configuration.",
|
||||
Value: nvcdi.ModeAuto,
|
||||
Destination: &opts.mode,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_MODE"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "dev-root",
|
||||
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
|
||||
Destination: &opts.devRoot,
|
||||
EnvVars: []string{"NVIDIA_CTK_DEV_ROOT"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "device-name-strategy",
|
||||
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
|
||||
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
|
||||
Destination: &opts.deviceNameStrategies,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_DEVICE_NAME_STRATEGIES"},
|
||||
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
|
||||
Value: nvcdi.DeviceNameStrategyIndex,
|
||||
Destination: &opts.deviceNameStrategy,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "driver-root",
|
||||
Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.",
|
||||
Destination: &opts.driverRoot,
|
||||
EnvVars: []string{"NVIDIA_CTK_DRIVER_ROOT"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "library-search-path",
|
||||
Usage: "Specify the path to search for libraries when discovering the entities that should be included in the CDI specification.\n\tNote: This option only applies to CSV mode.",
|
||||
Destination: &opts.librarySearchPaths,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_LIBRARY_SEARCH_PATHS"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-cdi-hook-path",
|
||||
Aliases: []string{"nvidia-ctk-path"},
|
||||
Usage: "Specify the path to use for the nvidia-cdi-hook in the generated CDI specification. " +
|
||||
"If not specified, the PATH will be searched for `nvidia-cdi-hook`. " +
|
||||
"NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.",
|
||||
Destination: &opts.nvidiaCDIHookPath,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_HOOK_PATH"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "ldconfig-path",
|
||||
Usage: "Specify the path to use for ldconfig in the generated CDI specification",
|
||||
Destination: &opts.ldconfigPath,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_LDCONFIG_PATH"},
|
||||
Name: "nvidia-ctk-path",
|
||||
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
|
||||
Destination: &opts.nvidiaCTKPath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "vendor",
|
||||
@@ -168,7 +129,6 @@ func (m command) build() *cli.Command {
|
||||
Usage: "the vendor string to use for the generated CDI specification.",
|
||||
Value: "nvidia.com",
|
||||
Destination: &opts.vendor,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_VENDOR"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "class",
|
||||
@@ -176,30 +136,17 @@ func (m command) build() *cli.Command {
|
||||
Usage: "the class string to use for the generated CDI specification.",
|
||||
Value: "gpu",
|
||||
Destination: &opts.class,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CLASS"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "csv.file",
|
||||
Usage: "The path to the list of CSV files to use when generating the CDI specification in CSV mode.",
|
||||
Value: cli.NewStringSlice(csv.DefaultFileList()...),
|
||||
Destination: &opts.csv.files,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CSV_FILES"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "csv.ignore-pattern",
|
||||
Usage: "specify a pattern the CSV mount specifications.",
|
||||
Usage: "Specify a pattern the CSV mount specifications.",
|
||||
Destination: &opts.csv.ignorePatterns,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CSV_IGNORE_PATTERNS"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "disable-hook",
|
||||
Aliases: []string{"disable-hooks"},
|
||||
Usage: "specify a specific hook to skip when generating CDI " +
|
||||
"specifications. This can be specified multiple times and the " +
|
||||
"special hook name 'all' can be used ensure that the generated " +
|
||||
"CDI specification does not include any hooks.",
|
||||
Destination: &opts.disabledHooks,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_DISABLED_HOOKS"},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -216,18 +163,22 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
|
||||
}
|
||||
|
||||
opts.mode = strings.ToLower(opts.mode)
|
||||
if !nvcdi.IsValidMode(opts.mode) {
|
||||
switch opts.mode {
|
||||
case nvcdi.ModeAuto:
|
||||
case nvcdi.ModeCSV:
|
||||
case nvcdi.ModeNvml:
|
||||
case nvcdi.ModeWsl:
|
||||
case nvcdi.ModeManagement:
|
||||
default:
|
||||
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
|
||||
}
|
||||
|
||||
for _, strategy := range opts.deviceNameStrategies.Value() {
|
||||
_, err := nvcdi.NewDeviceNamer(strategy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
opts.nvidiaCDIHookPath = config.ResolveNVIDIACDIHookPath(m.logger, opts.nvidiaCDIHookPath)
|
||||
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
|
||||
|
||||
if outputFileFormat := formatFromFilename(opts.output); outputFileFormat != "" {
|
||||
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
|
||||
@@ -278,36 +229,21 @@ func formatFromFilename(filename string) string {
|
||||
}
|
||||
|
||||
func (m command) generateSpec(opts *options) (spec.Interface, error) {
|
||||
var deviceNamers []nvcdi.DeviceNamer
|
||||
for _, strategy := range opts.deviceNameStrategies.Value() {
|
||||
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device namer: %v", err)
|
||||
}
|
||||
deviceNamers = append(deviceNamers, deviceNamer)
|
||||
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device namer: %v", err)
|
||||
}
|
||||
|
||||
cdiOptions := []nvcdi.Option{
|
||||
cdilib, err := nvcdi.New(
|
||||
nvcdi.WithLogger(m.logger),
|
||||
nvcdi.WithDriverRoot(opts.driverRoot),
|
||||
nvcdi.WithDevRoot(opts.devRoot),
|
||||
nvcdi.WithNVIDIACDIHookPath(opts.nvidiaCDIHookPath),
|
||||
nvcdi.WithLdconfigPath(opts.ldconfigPath),
|
||||
nvcdi.WithDeviceNamers(deviceNamers...),
|
||||
nvcdi.WithMode(opts.mode),
|
||||
nvcdi.WithConfigSearchPaths(opts.configSearchPaths.Value()),
|
||||
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
|
||||
nvcdi.WithDeviceNamer(deviceNamer),
|
||||
nvcdi.WithMode(string(opts.mode)),
|
||||
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
|
||||
nvcdi.WithCSVFiles(opts.csv.files.Value()),
|
||||
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
|
||||
// We set the following to allow for dependency injection:
|
||||
nvcdi.WithNvmlLib(opts.nvmllib),
|
||||
}
|
||||
|
||||
for _, hook := range opts.disabledHooks.Value() {
|
||||
cdiOptions = append(cdiOptions, nvcdi.WithDisabledHook(hook))
|
||||
}
|
||||
|
||||
cdilib, err := nvcdi.New(cdiOptions...)
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CDI library: %v", err)
|
||||
}
|
||||
|
||||
@@ -1,371 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
)
|
||||
|
||||
func TestGenerateSpec(t *testing.T) {
|
||||
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
require.NoError(t, err)
|
||||
|
||||
driverRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1")
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
testCases := []struct {
|
||||
description string
|
||||
options options
|
||||
expectedValidateError error
|
||||
expectedOptions options
|
||||
expectedError error
|
||||
expectedSpec string
|
||||
}{
|
||||
{
|
||||
description: "default",
|
||||
options: options{
|
||||
format: "yaml",
|
||||
mode: "nvml",
|
||||
vendor: "example.com",
|
||||
class: "device",
|
||||
driverRoot: driverRoot,
|
||||
},
|
||||
expectedOptions: options{
|
||||
format: "yaml",
|
||||
mode: "nvml",
|
||||
vendor: "example.com",
|
||||
class: "device",
|
||||
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
|
||||
driverRoot: driverRoot,
|
||||
},
|
||||
expectedSpec: `---
|
||||
cdiVersion: 0.5.0
|
||||
kind: example.com/device
|
||||
devices:
|
||||
- name: "0"
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||
- name: all
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||
containerEdits:
|
||||
env:
|
||||
- NVIDIA_VISIBLE_DEVICES=void
|
||||
deviceNodes:
|
||||
- path: /dev/nvidiactl
|
||||
hostPath: {{ .driverRoot }}/dev/nvidiactl
|
||||
hooks:
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- create-symlinks
|
||||
- --link
|
||||
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- enable-cuda-compat
|
||||
- --host-driver-version=999.88.77
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- update-ldcache
|
||||
- --folder
|
||||
- /lib/x86_64-linux-gnu
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- disable-device-node-modification
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
mounts:
|
||||
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
options:
|
||||
- ro
|
||||
- nosuid
|
||||
- nodev
|
||||
- rbind
|
||||
- rprivate
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "disableHooks1",
|
||||
options: options{
|
||||
format: "yaml",
|
||||
mode: "nvml",
|
||||
vendor: "example.com",
|
||||
class: "device",
|
||||
driverRoot: driverRoot,
|
||||
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")),
|
||||
},
|
||||
expectedOptions: options{
|
||||
format: "yaml",
|
||||
mode: "nvml",
|
||||
vendor: "example.com",
|
||||
class: "device",
|
||||
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
|
||||
driverRoot: driverRoot,
|
||||
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")),
|
||||
},
|
||||
expectedSpec: `---
|
||||
cdiVersion: 0.5.0
|
||||
kind: example.com/device
|
||||
devices:
|
||||
- name: "0"
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||
- name: all
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||
containerEdits:
|
||||
env:
|
||||
- NVIDIA_VISIBLE_DEVICES=void
|
||||
deviceNodes:
|
||||
- path: /dev/nvidiactl
|
||||
hostPath: {{ .driverRoot }}/dev/nvidiactl
|
||||
hooks:
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- create-symlinks
|
||||
- --link
|
||||
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- update-ldcache
|
||||
- --folder
|
||||
- /lib/x86_64-linux-gnu
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- disable-device-node-modification
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
mounts:
|
||||
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
options:
|
||||
- ro
|
||||
- nosuid
|
||||
- nodev
|
||||
- rbind
|
||||
- rprivate
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "disableHooks2",
|
||||
options: options{
|
||||
format: "yaml",
|
||||
mode: "nvml",
|
||||
vendor: "example.com",
|
||||
class: "device",
|
||||
driverRoot: driverRoot,
|
||||
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")),
|
||||
},
|
||||
expectedOptions: options{
|
||||
format: "yaml",
|
||||
mode: "nvml",
|
||||
vendor: "example.com",
|
||||
class: "device",
|
||||
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
|
||||
driverRoot: driverRoot,
|
||||
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")),
|
||||
},
|
||||
expectedSpec: `---
|
||||
cdiVersion: 0.5.0
|
||||
kind: example.com/device
|
||||
devices:
|
||||
- name: "0"
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||
- name: all
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||
containerEdits:
|
||||
env:
|
||||
- NVIDIA_VISIBLE_DEVICES=void
|
||||
deviceNodes:
|
||||
- path: /dev/nvidiactl
|
||||
hostPath: {{ .driverRoot }}/dev/nvidiactl
|
||||
hooks:
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- create-symlinks
|
||||
- --link
|
||||
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
- hookName: createContainer
|
||||
path: /usr/bin/nvidia-cdi-hook
|
||||
args:
|
||||
- nvidia-cdi-hook
|
||||
- disable-device-node-modification
|
||||
env:
|
||||
- NVIDIA_CTK_DEBUG=false
|
||||
mounts:
|
||||
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
options:
|
||||
- ro
|
||||
- nosuid
|
||||
- nodev
|
||||
- rbind
|
||||
- rprivate
|
||||
`,
|
||||
},
|
||||
{
|
||||
description: "disableHooksAll",
|
||||
options: options{
|
||||
format: "yaml",
|
||||
mode: "nvml",
|
||||
vendor: "example.com",
|
||||
class: "device",
|
||||
driverRoot: driverRoot,
|
||||
disabledHooks: valueOf(cli.NewStringSlice("all")),
|
||||
},
|
||||
expectedOptions: options{
|
||||
format: "yaml",
|
||||
mode: "nvml",
|
||||
vendor: "example.com",
|
||||
class: "device",
|
||||
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
|
||||
driverRoot: driverRoot,
|
||||
disabledHooks: valueOf(cli.NewStringSlice("all")),
|
||||
},
|
||||
expectedSpec: `---
|
||||
cdiVersion: 0.5.0
|
||||
kind: example.com/device
|
||||
devices:
|
||||
- name: "0"
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||
- name: all
|
||||
containerEdits:
|
||||
deviceNodes:
|
||||
- path: /dev/nvidia0
|
||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||
containerEdits:
|
||||
env:
|
||||
- NVIDIA_VISIBLE_DEVICES=void
|
||||
deviceNodes:
|
||||
- path: /dev/nvidiactl
|
||||
hostPath: {{ .driverRoot }}/dev/nvidiactl
|
||||
mounts:
|
||||
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||
options:
|
||||
- ro
|
||||
- nosuid
|
||||
- nodev
|
||||
- rbind
|
||||
- rprivate
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
err := c.validateFlags(nil, &tc.options)
|
||||
require.ErrorIs(t, err, tc.expectedValidateError)
|
||||
require.EqualValues(t, tc.expectedOptions, tc.options)
|
||||
|
||||
// Set up a mock server, reusing the DGX A100 mock.
|
||||
server := dgxa100.New()
|
||||
// Override the driver version to match the version in our mock filesystem.
|
||||
server.SystemGetDriverVersionFunc = func() (string, nvml.Return) {
|
||||
return "999.88.77", nvml.SUCCESS
|
||||
}
|
||||
// Set the device count to 1 explicitly since we only have a single device node.
|
||||
server.DeviceGetCountFunc = func() (int, nvml.Return) {
|
||||
return 1, nvml.SUCCESS
|
||||
}
|
||||
for _, d := range server.Devices {
|
||||
// TODO: This is not implemented in the mock.
|
||||
(d.(*dgxa100.Device)).GetMaxMigDeviceCountFunc = func() (int, nvml.Return) {
|
||||
return 0, nvml.SUCCESS
|
||||
}
|
||||
}
|
||||
tc.options.nvmllib = server
|
||||
|
||||
spec, err := c.generateSpec(&tc.options)
|
||||
require.ErrorIs(t, err, tc.expectedError)
|
||||
|
||||
var buf bytes.Buffer
|
||||
_, err = spec.WriteTo(&buf)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, strings.ReplaceAll(tc.expectedSpec, "{{ .driverRoot }}", driverRoot), buf.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// valueOf returns the value of a pointer.
|
||||
// Note that this does not check for a nil pointer and is only used for testing.
|
||||
func valueOf[T any](v *T) T {
|
||||
return *v
|
||||
}
|
||||
@@ -17,7 +17,6 @@
|
||||
package list
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
@@ -30,9 +29,7 @@ type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type config struct {
|
||||
cdiSpecDirs cli.StringSlice
|
||||
}
|
||||
type config struct{}
|
||||
|
||||
// NewCommand constructs a cdi list command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
@@ -58,45 +55,30 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "spec-dir",
|
||||
Usage: "specify the directories to scan for CDI specifications",
|
||||
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
|
||||
Destination: &cfg.cdiSpecDirs,
|
||||
EnvVars: []string{"NVIDIA_CTK_CDI_SPEC_DIRS"},
|
||||
},
|
||||
}
|
||||
c.Flags = []cli.Flag{}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, cfg *config) error {
|
||||
if len(cfg.cdiSpecDirs.Value()) == 0 {
|
||||
return errors.New("at least one CDI specification directory must be specified")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
registry, err := cdi.NewCache(
|
||||
cdi.WithAutoRefresh(false),
|
||||
cdi.WithSpecDirs(cfg.cdiSpecDirs.Value()...),
|
||||
cdi.WithSpecDirs(cdi.DefaultSpecDirs...),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create CDI cache: %v", err)
|
||||
}
|
||||
|
||||
_ = registry.Refresh()
|
||||
if errors := registry.GetErrors(); len(errors) > 0 {
|
||||
m.logger.Warningf("The following registry errors were reported:")
|
||||
for k, err := range errors {
|
||||
m.logger.Warningf("%v: %v", k, err)
|
||||
}
|
||||
}
|
||||
|
||||
refreshErr := registry.Refresh()
|
||||
devices := registry.ListDevices()
|
||||
m.logger.Infof("Found %d CDI devices", len(devices))
|
||||
if refreshErr != nil {
|
||||
m.logger.Warningf("Refreshing the CDI registry returned the following error(s): %v", refreshErr)
|
||||
}
|
||||
for _, device := range devices {
|
||||
fmt.Printf("%s\n", device)
|
||||
}
|
||||
|
||||
@@ -26,9 +26,14 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
)
|
||||
|
||||
type loadSaver interface {
|
||||
Load() (spec.Interface, error)
|
||||
Save(spec.Interface) error
|
||||
}
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
@@ -40,9 +45,8 @@ type transformOptions struct {
|
||||
|
||||
type options struct {
|
||||
transformOptions
|
||||
from string
|
||||
to string
|
||||
relativeTo string
|
||||
from string
|
||||
to string
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
@@ -69,11 +73,6 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "from",
|
||||
Usage: "specify the root to be transformed",
|
||||
Destination: &opts.from,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "input",
|
||||
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
|
||||
@@ -86,10 +85,9 @@ func (m command) build() *cli.Command {
|
||||
Destination: &opts.output,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "relative-to",
|
||||
Usage: "specify whether the transform is relative to the host or to the container. One of [ host | container ]",
|
||||
Value: "host",
|
||||
Destination: &opts.relativeTo,
|
||||
Name: "from",
|
||||
Usage: "specify the root to be transformed",
|
||||
Destination: &opts.from,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "to",
|
||||
@@ -103,12 +101,6 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, opts *options) error {
|
||||
switch opts.relativeTo {
|
||||
case "host":
|
||||
case "container":
|
||||
default:
|
||||
return fmt.Errorf("invalid --relative-to value: %v", opts.relativeTo)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -118,10 +110,9 @@ func (m command) run(c *cli.Context, opts *options) error {
|
||||
return fmt.Errorf("failed to load CDI specification: %w", err)
|
||||
}
|
||||
|
||||
err = transformroot.New(
|
||||
transformroot.WithRoot(opts.from),
|
||||
transformroot.WithTargetRoot(opts.to),
|
||||
transformroot.WithRelativeTo(opts.relativeTo),
|
||||
err = transform.NewRootTransformer(
|
||||
opts.from,
|
||||
opts.to,
|
||||
).Transform(spec.Raw())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to transform CDI specification: %w", err)
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package transform
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -38,8 +38,7 @@ type command struct {
|
||||
// options stores the subcommand options
|
||||
type options struct {
|
||||
flags.Options
|
||||
setListSeparator string
|
||||
sets cli.StringSlice
|
||||
sets cli.StringSlice
|
||||
}
|
||||
|
||||
// NewCommand constructs an config command with the specified logger
|
||||
@@ -58,9 +57,6 @@ func (m command) build() *cli.Command {
|
||||
c := cli.Command{
|
||||
Name: "config",
|
||||
Usage: "Interact with the NVIDIA Container Toolkit configuration",
|
||||
Before: func(ctx *cli.Context) error {
|
||||
return validateFlags(ctx, &opts)
|
||||
},
|
||||
Action: func(ctx *cli.Context) error {
|
||||
return run(ctx, &opts)
|
||||
},
|
||||
@@ -75,21 +71,10 @@ func (m command) build() *cli.Command {
|
||||
Destination: &opts.Config,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "set",
|
||||
Usage: "Set a config value using the pattern 'key[=value]'. " +
|
||||
"Specifying only 'key' is equivalent to 'key=true' for boolean settings. " +
|
||||
"This flag can be specified multiple times, but only the last value for a specific " +
|
||||
"config option is applied. " +
|
||||
"If the setting represents a list, the elements are colon-separated.",
|
||||
Name: "set",
|
||||
Usage: "Set a config value using the pattern key=value. If value is empty, this is equivalent to specifying the same key in unset. This flag can be specified multiple times",
|
||||
Destination: &opts.sets,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "set-list-separator",
|
||||
Usage: "Specify a separator for lists applied using the set command.",
|
||||
Hidden: true,
|
||||
Value: ":",
|
||||
Destination: &opts.setListSeparator,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "in-place",
|
||||
Aliases: []string{"i"},
|
||||
@@ -111,13 +96,6 @@ func (m command) build() *cli.Command {
|
||||
return &c
|
||||
}
|
||||
|
||||
func validateFlags(c *cli.Context, opts *options) error {
|
||||
if opts.setListSeparator == "" {
|
||||
return fmt.Errorf("set-list-separator must be set")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func run(c *cli.Context, opts *options) error {
|
||||
cfgToml, err := config.New(
|
||||
config.WithConfigFile(opts.Config),
|
||||
@@ -127,15 +105,11 @@ func run(c *cli.Context, opts *options) error {
|
||||
}
|
||||
|
||||
for _, set := range opts.sets.Value() {
|
||||
key, value, err := setFlagToKeyValue(set, opts.setListSeparator)
|
||||
key, value, err := setFlagToKeyValue(set)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid --set option %v: %w", set, err)
|
||||
}
|
||||
if value == nil {
|
||||
_ = cfgToml.Delete(key)
|
||||
} else {
|
||||
cfgToml.Set(key, value)
|
||||
}
|
||||
cfgToml.Set(key, value)
|
||||
}
|
||||
|
||||
if err := opts.EnsureOutputFolder(); err != nil {
|
||||
@@ -147,10 +121,10 @@ func run(c *cli.Context, opts *options) error {
|
||||
}
|
||||
defer output.Close()
|
||||
|
||||
if _, err := cfgToml.Save(output); err != nil {
|
||||
return fmt.Errorf("failed to save config: %v", err)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfgToml.Save(output)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -161,7 +135,7 @@ var errInvalidFormat = errors.New("invalid format")
|
||||
// setFlagToKeyValue converts a --set flag to a key-value pair.
|
||||
// The set flag is of the form key[=value], with the value being optional if key refers to a
|
||||
// boolean config option.
|
||||
func setFlagToKeyValue(setFlag string, setListSeparator string) (string, interface{}, error) {
|
||||
func setFlagToKeyValue(setFlag string) (string, interface{}, error) {
|
||||
setParts := strings.SplitN(setFlag, "=", 2)
|
||||
key := setParts[0]
|
||||
|
||||
@@ -172,36 +146,24 @@ func setFlagToKeyValue(setFlag string, setListSeparator string) (string, interfa
|
||||
|
||||
kind := field.Kind()
|
||||
if len(setParts) != 2 {
|
||||
if kind == reflect.Bool || (kind == reflect.Pointer && field.Elem().Kind() == reflect.Bool) {
|
||||
if kind == reflect.Bool {
|
||||
return key, true, nil
|
||||
}
|
||||
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
|
||||
}
|
||||
|
||||
value := setParts[1]
|
||||
if kind == reflect.Pointer && value != "nil" {
|
||||
kind = field.Elem().Kind()
|
||||
}
|
||||
switch kind {
|
||||
case reflect.Pointer:
|
||||
return key, nil, nil
|
||||
case reflect.Bool:
|
||||
b, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
|
||||
}
|
||||
return key, b, nil
|
||||
return key, b, err
|
||||
case reflect.String:
|
||||
return key, value, nil
|
||||
case reflect.Slice:
|
||||
valueParts := []string{value}
|
||||
for _, sep := range []string{setListSeparator, ","} {
|
||||
if !strings.Contains(value, sep) {
|
||||
continue
|
||||
}
|
||||
valueParts = strings.Split(value, sep)
|
||||
break
|
||||
}
|
||||
valueParts := strings.Split(value, ",")
|
||||
switch field.Elem().Kind() {
|
||||
case reflect.String:
|
||||
return key, valueParts, nil
|
||||
@@ -239,7 +201,7 @@ func getStruct(current reflect.Type, paths ...string) (reflect.StructField, erro
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if strings.SplitN(v, ",", 2)[0] != tomlField {
|
||||
if v != tomlField {
|
||||
continue
|
||||
}
|
||||
if len(paths) == 1 {
|
||||
|
||||
@@ -25,12 +25,11 @@ import (
|
||||
func TestSetFlagToKeyValue(t *testing.T) {
|
||||
// TODO: We need to enable this test again since switching to reflect.
|
||||
testCases := []struct {
|
||||
description string
|
||||
setFlag string
|
||||
setListSeparator string
|
||||
expectedKey string
|
||||
expectedValue interface{}
|
||||
expectedError error
|
||||
description string
|
||||
setFlag string
|
||||
expectedKey string
|
||||
expectedValue interface{}
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "option not present returns an error",
|
||||
@@ -107,34 +106,22 @@ func TestSetFlagToKeyValue(t *testing.T) {
|
||||
expectedValue: []string{"string-value"},
|
||||
},
|
||||
{
|
||||
description: "[]string option returns multiple values",
|
||||
setFlag: "nvidia-container-cli.environment=first,second",
|
||||
setListSeparator: ",",
|
||||
expectedKey: "nvidia-container-cli.environment",
|
||||
expectedValue: []string{"first", "second"},
|
||||
description: "[]string option returns multiple values",
|
||||
setFlag: "nvidia-container-cli.environment=first,second",
|
||||
expectedKey: "nvidia-container-cli.environment",
|
||||
expectedValue: []string{"first", "second"},
|
||||
},
|
||||
{
|
||||
description: "[]string option returns values with equals",
|
||||
setFlag: "nvidia-container-cli.environment=first=1,second=2",
|
||||
setListSeparator: ",",
|
||||
expectedKey: "nvidia-container-cli.environment",
|
||||
expectedValue: []string{"first=1", "second=2"},
|
||||
},
|
||||
{
|
||||
description: "[]string option returns multiple values semi-colon",
|
||||
setFlag: "nvidia-container-cli.environment=first;second",
|
||||
setListSeparator: ";",
|
||||
expectedKey: "nvidia-container-cli.environment",
|
||||
expectedValue: []string{"first", "second"},
|
||||
description: "[]string option returns values with equals",
|
||||
setFlag: "nvidia-container-cli.environment=first=1,second=2",
|
||||
expectedKey: "nvidia-container-cli.environment",
|
||||
expectedValue: []string{"first=1", "second=2"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
if tc.setListSeparator == "" {
|
||||
tc.setListSeparator = ","
|
||||
}
|
||||
k, v, err := setFlagToKeyValue(tc.setFlag, tc.setListSeparator)
|
||||
k, v, err := setFlagToKeyValue(tc.setFlag)
|
||||
require.ErrorIs(t, err, tc.expectedError)
|
||||
require.EqualValues(t, tc.expectedKey, k)
|
||||
require.EqualValues(t, tc.expectedValue, v)
|
||||
|
||||
@@ -19,11 +19,10 @@ package defaultsubcommand
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -86,7 +85,8 @@ func (m command) run(c *cli.Context, opts *flags.Options) error {
|
||||
}
|
||||
defer output.Close()
|
||||
|
||||
if _, err = cfgToml.Save(output); err != nil {
|
||||
_, err = cfgToml.Save(output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write output: %v", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,18 +17,16 @@
|
||||
package chmod
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -37,8 +35,7 @@ type command struct {
|
||||
|
||||
type config struct {
|
||||
paths cli.StringSlice
|
||||
modeStr string
|
||||
mode fs.FileMode
|
||||
mode string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
@@ -69,13 +66,13 @@ func (m command) build() *cli.Command {
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "path",
|
||||
Usage: "Specify a path to apply the specified mode to",
|
||||
Usage: "Specifiy a path to apply the specified mode to",
|
||||
Destination: &cfg.paths,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "mode",
|
||||
Usage: "Specify the file mode",
|
||||
Destination: &cfg.modeStr,
|
||||
Destination: &cfg.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
@@ -88,16 +85,10 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
func validateFlags(c *cli.Context, cfg *config) error {
|
||||
if strings.TrimSpace(cfg.modeStr) == "" {
|
||||
if strings.TrimSpace(cfg.mode) == "" {
|
||||
return fmt.Errorf("a non-empty mode must be specified")
|
||||
}
|
||||
|
||||
modeInt, err := strconv.ParseUint(cfg.modeStr, 8, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse mode as octal: %v", err)
|
||||
}
|
||||
cfg.mode = fs.FileMode(modeInt)
|
||||
|
||||
for _, p := range cfg.paths.Value() {
|
||||
if strings.TrimSpace(p) == "" {
|
||||
return fmt.Errorf("paths must not be empty")
|
||||
@@ -121,38 +112,33 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
return fmt.Errorf("empty container root detected")
|
||||
}
|
||||
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value(), cfg.mode)
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value())
|
||||
if len(paths) == 0 {
|
||||
m.logger.Debugf("No paths specified; exiting")
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, path := range paths {
|
||||
err = os.Chmod(path, cfg.mode)
|
||||
// in some cases this is not an issue (e.g. whole /dev mounted), see #143
|
||||
if errors.Is(err, fs.ErrPermission) {
|
||||
m.logger.Debugf("Ignoring permission error with chmod: %v", err)
|
||||
err = nil
|
||||
}
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
targets, err := locator.Locate("chmod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to locate chmod: %v", err)
|
||||
}
|
||||
chmodPath := targets[0]
|
||||
|
||||
return err
|
||||
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
|
||||
|
||||
return syscall.Exec(chmodPath, args, nil)
|
||||
}
|
||||
|
||||
// getPaths updates the specified paths relative to the root.
|
||||
func (m command) getPaths(root string, paths []string, desiredMode fs.FileMode) []string {
|
||||
func (m command) getPaths(root string, paths []string) []string {
|
||||
var pathsInRoot []string
|
||||
for _, f := range paths {
|
||||
path := filepath.Join(root, f)
|
||||
stat, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
m.logger.Debugf("Skipping path %q: %v", path, err)
|
||||
continue
|
||||
}
|
||||
if (stat.Mode()&(fs.ModePerm|fs.ModeSetuid|fs.ModeSetgid|fs.ModeSticky))^desiredMode == 0 {
|
||||
m.logger.Debugf("Skipping path %q: already desired mode", path)
|
||||
continue
|
||||
}
|
||||
pathsInRoot = append(pathsInRoot, path)
|
||||
}
|
||||
|
||||
230
cmd/nvidia-ctk/hook/create-symlinks/create-symlinks.go
Normal file
230
cmd/nvidia-ctk/hook/create-symlinks/create-symlinks.go
Normal file
@@ -0,0 +1,230 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package symlinks
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type config struct {
|
||||
hostRoot string
|
||||
filenames cli.StringSlice
|
||||
links cli.StringSlice
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a hook command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the '' command
|
||||
c := cli.Command{
|
||||
Name: "create-symlinks",
|
||||
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "host-root",
|
||||
Usage: "The root on the host filesystem to use to resolve symlinks",
|
||||
Destination: &cfg.hostRoot,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "csv-filename",
|
||||
Usage: "Specify a (CSV) filename to process",
|
||||
Destination: &cfg.filenames,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "link",
|
||||
Usage: "Specify a specific link to create. The link is specified as target::link",
|
||||
Destination: &cfg.links,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
csvFiles := cfg.filenames.Value()
|
||||
|
||||
chainLocator := lookup.NewSymlinkChainLocator(
|
||||
lookup.WithLogger(m.logger),
|
||||
lookup.WithRoot(cfg.hostRoot),
|
||||
)
|
||||
|
||||
var candidates []string
|
||||
for _, file := range csvFiles {
|
||||
mountSpecs, err := csv.NewCSVFileParser(m.logger, file).Parse()
|
||||
if err != nil {
|
||||
m.logger.Debugf("Skipping CSV file %v: %v", file, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, ms := range mountSpecs {
|
||||
if ms.Type != csv.MountSpecSym {
|
||||
continue
|
||||
}
|
||||
targets, err := chainLocator.Locate(ms.Path)
|
||||
if err != nil {
|
||||
m.logger.Warningf("Failed to locate symlink %v", ms.Path)
|
||||
}
|
||||
candidates = append(candidates, targets...)
|
||||
}
|
||||
}
|
||||
|
||||
created := make(map[string]bool)
|
||||
// candidates is a list of absolute paths to symlinks in a chain, or the final target of the chain.
|
||||
for _, candidate := range candidates {
|
||||
target, err := symlinks.Resolve(candidate)
|
||||
if err != nil {
|
||||
m.logger.Debugf("Skipping invalid link: %v", err)
|
||||
continue
|
||||
} else if target == candidate {
|
||||
m.logger.Debugf("%v is not a symlink", candidate)
|
||||
continue
|
||||
}
|
||||
|
||||
err = m.createLink(created, cfg.hostRoot, containerRoot, target, candidate)
|
||||
if err != nil {
|
||||
m.logger.Warningf("Failed to create link %v: %v", []string{target, candidate}, err)
|
||||
}
|
||||
}
|
||||
|
||||
links := cfg.links.Value()
|
||||
for _, l := range links {
|
||||
parts := strings.Split(l, "::")
|
||||
if len(parts) != 2 {
|
||||
m.logger.Warningf("Invalid link specification %v", l)
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
|
||||
if err != nil {
|
||||
m.logger.Warningf("Failed to create link %v: %v", parts, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
|
||||
linkPath, err := changeRoot(hostRoot, containerRoot, link)
|
||||
if err != nil {
|
||||
m.logger.Warningf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
|
||||
}
|
||||
if created[linkPath] {
|
||||
m.logger.Debugf("Link %v already created", linkPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
targetPath, err := changeRoot(hostRoot, "/", target)
|
||||
if err != nil {
|
||||
m.logger.Warningf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)
|
||||
err = os.MkdirAll(filepath.Dir(linkPath), 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
err = os.Symlink(target, linkPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create symlink: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func changeRoot(current string, new string, path string) (string, error) {
|
||||
if !filepath.IsAbs(path) {
|
||||
return path, nil
|
||||
}
|
||||
|
||||
relative := path
|
||||
if current != "" {
|
||||
r, err := filepath.Rel(current, path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
relative = r
|
||||
}
|
||||
|
||||
return filepath.Join(new, relative), nil
|
||||
}
|
||||
|
||||
// Locate returns the link target of the specified filename or an empty slice if the
|
||||
// specified filename is not a symlink.
|
||||
func (m command) Locate(filename string) ([]string, error) {
|
||||
info, err := os.Lstat(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get file info: %v", info)
|
||||
}
|
||||
if info.Mode()&os.ModeSymlink == 0 {
|
||||
m.logger.Debugf("%v is not a symlink", filename)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
target, err := os.Readlink(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error checking symlink: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Debugf("Resolved link: '%v' => '%v'", filename, target)
|
||||
|
||||
return []string{target}, nil
|
||||
}
|
||||
@@ -17,9 +17,11 @@
|
||||
package hook
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands"
|
||||
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
@@ -27,7 +29,7 @@ type hookCommand struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
// NewCommand constructs CLI subcommand for handling CDI hooks.
|
||||
// NewCommand constructs a hook command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := hookCommand{
|
||||
logger: logger,
|
||||
@@ -37,24 +39,17 @@ func NewCommand(logger logger.Interface) *cli.Command {
|
||||
|
||||
// build
|
||||
func (m hookCommand) build() *cli.Command {
|
||||
// Create the 'hook' subcommand
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "hook",
|
||||
Usage: "A collection of hooks that may be injected into an OCI spec",
|
||||
// We set the default action for the `hook` subcommand to issue a
|
||||
// warning and exit with no error.
|
||||
// This means that if an unsupported hook is run, a container will not fail
|
||||
// to launch. An unsupported hook could be the result of a CDI specification
|
||||
// referring to a new hook that is not yet supported by an older NVIDIA
|
||||
// Container Toolkit version or a hook that has been removed in newer
|
||||
// version.
|
||||
Action: func(ctx *cli.Context) error {
|
||||
commands.IssueUnsupportedHookWarning(m.logger, ctx)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
hook.Subcommands = commands.New(m.logger)
|
||||
hook.Subcommands = []*cli.Command{
|
||||
ldcache.NewCommand(m.logger),
|
||||
symlinks.NewCommand(m.logger),
|
||||
chmod.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
}
|
||||
|
||||
144
cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go
Normal file
144
cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go
Normal file
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type config struct {
|
||||
folders cli.StringSlice
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs an update-ldcache command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the update-ldcache command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'update-ldcache' command
|
||||
c := cli.Command{
|
||||
Name: "update-ldcache",
|
||||
Usage: "Update ldcache in a container by running ldconfig",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "folder",
|
||||
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Destination: &cfg.folders,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
_, err = os.Stat(filepath.Join(containerRoot, "/etc/ld.so.cache"))
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
m.logger.Debugf("No ld.so.cache found, skipping update")
|
||||
return nil
|
||||
}
|
||||
|
||||
err = m.createConfig(containerRoot, cfg.folders.Value())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf: %v", err)
|
||||
}
|
||||
|
||||
args := []string{"/sbin/ldconfig"}
|
||||
if containerRoot != "" {
|
||||
args = append(args, "-r", containerRoot)
|
||||
}
|
||||
|
||||
return syscall.Exec(args[0], args, nil)
|
||||
}
|
||||
|
||||
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
|
||||
// to include the required paths.
|
||||
func (m command) createConfig(root string, folders []string) error {
|
||||
if len(folders) == 0 {
|
||||
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Join(root, "/etc/ld.so.conf.d"), 0755); err != nil {
|
||||
return fmt.Errorf("failed to create ld.so.conf.d: %v", err)
|
||||
}
|
||||
|
||||
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create config file: %v", err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
|
||||
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
|
||||
|
||||
configured := make(map[string]bool)
|
||||
for _, folder := range folders {
|
||||
if configured[folder] {
|
||||
continue
|
||||
}
|
||||
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
|
||||
}
|
||||
configured[folder] = true
|
||||
}
|
||||
|
||||
// The created file needs to be world readable for the cases where the container is run as a non-root user.
|
||||
if err := os.Chmod(configFile.Name(), 0644); err != nil {
|
||||
return fmt.Errorf("failed to chmod config file: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -17,9 +17,8 @@
|
||||
package info
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -19,8 +19,6 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
|
||||
@@ -28,6 +26,7 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
@@ -49,7 +48,6 @@ func main() {
|
||||
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.DisableSliceFlagSeparator = true
|
||||
c.Name = "NVIDIA Container Toolkit CLI"
|
||||
c.UseShortOptionHandling = true
|
||||
c.EnableBashCompletion = true
|
||||
|
||||
@@ -28,7 +28,6 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -44,17 +43,13 @@ const (
|
||||
defaultContainerdConfigFilePath = "/etc/containerd/config.toml"
|
||||
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
|
||||
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
|
||||
|
||||
defaultConfigSource = configSourceFile
|
||||
configSourceCommand = "command"
|
||||
configSourceFile = "file"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
// NewCommand constructs a configure command with the specified logger
|
||||
// NewCommand constructs an configure command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
@@ -68,8 +63,6 @@ type config struct {
|
||||
dryRun bool
|
||||
runtime string
|
||||
configFilePath string
|
||||
executablePath string
|
||||
configSource string
|
||||
mode string
|
||||
hookFilePath string
|
||||
|
||||
@@ -119,22 +112,11 @@ func (m command) build() *cli.Command {
|
||||
Usage: "path to the config file for the target runtime",
|
||||
Destination: &config.configFilePath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "executable-path",
|
||||
Usage: "The path to the runtime executable. This is used to extract the current config",
|
||||
Destination: &config.executablePath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config-mode",
|
||||
Usage: "the config mode for runtimes that support multiple configuration mechanisms",
|
||||
Destination: &config.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config-source",
|
||||
Usage: "the source to retrieve the container runtime configuration; one of [command, file]\"",
|
||||
Destination: &config.configSource,
|
||||
Value: defaultConfigSource,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "oci-hook-path",
|
||||
Usage: "the path to the OCI runtime hook to create if --config-mode=oci-hook is specified. If no path is specified, the generated hook is output to STDOUT.\n\tNote: The use of OCI hooks is deprecated.",
|
||||
@@ -167,7 +149,7 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "cdi.enabled",
|
||||
Aliases: []string{"cdi.enable", "enable-cdi"},
|
||||
Aliases: []string{"cdi.enable"},
|
||||
Usage: "Enable CDI in the configured runtime",
|
||||
Destination: &config.cdi.enabled,
|
||||
},
|
||||
@@ -212,34 +194,6 @@ func (m command) validateFlags(c *cli.Context, config *config) error {
|
||||
config.cdi.enabled = false
|
||||
}
|
||||
|
||||
if config.executablePath != "" && config.runtime == "docker" {
|
||||
m.logger.Warningf("Ignoring executable-path=%q flag for %v", config.executablePath, config.runtime)
|
||||
config.executablePath = ""
|
||||
}
|
||||
|
||||
switch config.configSource {
|
||||
case configSourceCommand:
|
||||
if config.runtime == "docker" {
|
||||
m.logger.Warningf("A %v Config Source is not supported for %v; using %v", config.configSource, config.runtime, configSourceFile)
|
||||
config.configSource = configSourceFile
|
||||
}
|
||||
case configSourceFile:
|
||||
break
|
||||
default:
|
||||
return fmt.Errorf("unrecognized Config Source: %v", config.configSource)
|
||||
}
|
||||
|
||||
if config.configFilePath == "" {
|
||||
switch config.runtime {
|
||||
case "containerd":
|
||||
config.configFilePath = defaultContainerdConfigFilePath
|
||||
case "crio":
|
||||
config.configFilePath = defaultCrioConfigFilePath
|
||||
case "docker":
|
||||
config.configFilePath = defaultDockerConfigFilePath
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -256,29 +210,25 @@ func (m command) configureWrapper(c *cli.Context, config *config) error {
|
||||
|
||||
// configureConfigFile updates the specified container engine config file to enable the NVIDIA runtime.
|
||||
func (m command) configureConfigFile(c *cli.Context, config *config) error {
|
||||
configSource, err := config.resolveConfigSource()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
configFilePath := config.resolveConfigFilePath()
|
||||
|
||||
var cfg engine.Interface
|
||||
var err error
|
||||
switch config.runtime {
|
||||
case "containerd":
|
||||
cfg, err = containerd.New(
|
||||
containerd.WithLogger(m.logger),
|
||||
containerd.WithPath(config.configFilePath),
|
||||
containerd.WithConfigSource(configSource),
|
||||
containerd.WithPath(configFilePath),
|
||||
)
|
||||
case "crio":
|
||||
cfg, err = crio.New(
|
||||
crio.WithLogger(m.logger),
|
||||
crio.WithPath(config.configFilePath),
|
||||
crio.WithConfigSource(configSource),
|
||||
crio.WithPath(configFilePath),
|
||||
)
|
||||
case "docker":
|
||||
cfg, err = docker.New(
|
||||
docker.WithLogger(m.logger),
|
||||
docker.WithPath(config.configFilePath),
|
||||
docker.WithPath(configFilePath),
|
||||
)
|
||||
default:
|
||||
err = fmt.Errorf("unrecognized runtime '%v'", config.runtime)
|
||||
@@ -296,11 +246,12 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
if config.cdi.enabled {
|
||||
cfg.EnableCDI()
|
||||
err = enableCDI(config, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to enable CDI in %s: %w", config.runtime, err)
|
||||
}
|
||||
|
||||
outputPath := config.getOutputConfigPath()
|
||||
outputPath := config.getOuputConfigPath()
|
||||
n, err := cfg.Save(outputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
@@ -318,35 +269,28 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// resolveConfigSource returns the default config source or the user provided config source
|
||||
func (c *config) resolveConfigSource() (toml.Loader, error) {
|
||||
switch c.configSource {
|
||||
case configSourceCommand:
|
||||
return c.getCommandConfigSource(), nil
|
||||
case configSourceFile:
|
||||
return toml.FromFile(c.configFilePath), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unrecognized config source: %s", c.configSource)
|
||||
// resolveConfigFilePath returns the default config file path for the configured container engine
|
||||
func (c *config) resolveConfigFilePath() string {
|
||||
if c.configFilePath != "" {
|
||||
return c.configFilePath
|
||||
}
|
||||
}
|
||||
|
||||
// getConfigSourceCommand returns the default cli command to fetch the current runtime config
|
||||
func (c *config) getCommandConfigSource() toml.Loader {
|
||||
switch c.runtime {
|
||||
case "containerd":
|
||||
return containerd.CommandLineSource("", c.executablePath)
|
||||
return defaultContainerdConfigFilePath
|
||||
case "crio":
|
||||
return crio.CommandLineSource("", c.executablePath)
|
||||
return defaultCrioConfigFilePath
|
||||
case "docker":
|
||||
return defaultDockerConfigFilePath
|
||||
}
|
||||
return toml.Empty
|
||||
return ""
|
||||
}
|
||||
|
||||
// getOutputConfigPath returns the configured config path or "" if dry-run is enabled
|
||||
func (c *config) getOutputConfigPath() string {
|
||||
// getOuputConfigPath returns the configured config path or "" if dry-run is enabled
|
||||
func (c *config) getOuputConfigPath() string {
|
||||
if c.dryRun {
|
||||
return ""
|
||||
}
|
||||
return c.configFilePath
|
||||
return c.resolveConfigFilePath()
|
||||
}
|
||||
|
||||
// configureOCIHook creates and configures the OCI hook for the NVIDIA runtime
|
||||
@@ -357,3 +301,17 @@ func (m *command) configureOCIHook(c *cli.Context, config *config) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// enableCDI enables the use of CDI in the corresponding container engine
|
||||
func enableCDI(config *config, cfg engine.Interface) error {
|
||||
if !config.cdi.enabled {
|
||||
return nil
|
||||
}
|
||||
switch config.runtime {
|
||||
case "containerd":
|
||||
return cfg.Set("enable_cdi", true)
|
||||
case "docker":
|
||||
return cfg.Set("features", map[string]bool{"cdi": true})
|
||||
}
|
||||
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
|
||||
}
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type runtimeCommand struct {
|
||||
|
||||
@@ -19,13 +19,16 @@ package devchar
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -40,6 +43,7 @@ type config struct {
|
||||
devCharPath string
|
||||
driverRoot string
|
||||
dryRun bool
|
||||
watch bool
|
||||
createAll bool
|
||||
createDeviceNodes bool
|
||||
loadKernelModules bool
|
||||
@@ -82,7 +86,14 @@ func (m command) build() *cli.Command {
|
||||
Usage: "The path to the driver root. `DRIVER_ROOT`/dev is searched for NVIDIA device nodes.",
|
||||
Value: "/",
|
||||
Destination: &cfg.driverRoot,
|
||||
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
|
||||
EnvVars: []string{"DRIVER_ROOT"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "watch",
|
||||
Usage: "If set, the command will watch for changes to the driver root and recreate the symlinks when changes are detected.",
|
||||
Value: false,
|
||||
Destination: &cfg.watch,
|
||||
EnvVars: []string{"WATCH"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "create-all",
|
||||
@@ -115,7 +126,7 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
func (m command) validateFlags(r *cli.Context, cfg *config) error {
|
||||
if cfg.createAll {
|
||||
if cfg.createAll && cfg.watch {
|
||||
return fmt.Errorf("create-all and watch are mutually exclusive")
|
||||
}
|
||||
|
||||
@@ -133,6 +144,19 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
var watcher *fsnotify.Watcher
|
||||
var sigs chan os.Signal
|
||||
|
||||
if cfg.watch {
|
||||
watcher, err := newFSWatcher(filepath.Join(cfg.driverRoot, "dev"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create FS watcher: %v", err)
|
||||
}
|
||||
defer watcher.Close()
|
||||
|
||||
sigs = newOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
|
||||
}
|
||||
|
||||
l, err := NewSymlinkCreator(
|
||||
WithLogger(m.logger),
|
||||
WithDevCharPath(cfg.devCharPath),
|
||||
@@ -146,11 +170,47 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
return fmt.Errorf("failed to create symlink creator: %v", err)
|
||||
}
|
||||
|
||||
create:
|
||||
err = l.CreateLinks()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create links: %v", err)
|
||||
}
|
||||
return nil
|
||||
if !cfg.watch {
|
||||
return nil
|
||||
}
|
||||
for {
|
||||
select {
|
||||
|
||||
case event := <-watcher.Events:
|
||||
deviceNode := filepath.Base(event.Name)
|
||||
if !strings.HasPrefix(deviceNode, "nvidia") {
|
||||
continue
|
||||
}
|
||||
if event.Op&fsnotify.Create == fsnotify.Create {
|
||||
m.logger.Infof("%s created, restarting.", event.Name)
|
||||
goto create
|
||||
}
|
||||
if event.Op&fsnotify.Create == fsnotify.Remove {
|
||||
m.logger.Infof("%s removed. Ignoring", event.Name)
|
||||
|
||||
}
|
||||
|
||||
// Watch for any other fs errors and log them.
|
||||
case err := <-watcher.Errors:
|
||||
m.logger.Errorf("inotify: %s", err)
|
||||
|
||||
// React to signals
|
||||
case s := <-sigs:
|
||||
switch s {
|
||||
case syscall.SIGHUP:
|
||||
m.logger.Infof("Received SIGHUP, recreating symlinks.")
|
||||
goto create
|
||||
default:
|
||||
m.logger.Infof("Received signal %q, shutting down.", s)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type linkCreator struct {
|
||||
@@ -338,3 +398,27 @@ type deviceNode struct {
|
||||
func (d deviceNode) devCharName() string {
|
||||
return fmt.Sprintf("%d:%d", d.major, d.minor)
|
||||
}
|
||||
|
||||
func newFSWatcher(files ...string) (*fsnotify.Watcher, error) {
|
||||
watcher, err := fsnotify.NewWatcher()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, f := range files {
|
||||
err = watcher.Add(f)
|
||||
if err != nil {
|
||||
watcher.Close()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return watcher, nil
|
||||
}
|
||||
|
||||
func newOSWatcher(sigs ...os.Signal) chan os.Signal {
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, sigs...)
|
||||
|
||||
return sigChan
|
||||
}
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type nodeLister interface {
|
||||
@@ -64,13 +63,20 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
|
||||
if m.nodeIsBlocked(d) {
|
||||
continue
|
||||
}
|
||||
|
||||
var stat unix.Stat_t
|
||||
err := unix.Stat(d, &stat)
|
||||
if err != nil {
|
||||
m.logger.Warningf("Could not stat device: %v", err)
|
||||
continue
|
||||
}
|
||||
deviceNodes = append(deviceNodes, newDeviceNode(d, stat))
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(uint64(stat.Rdev)),
|
||||
minor: unix.Minor(uint64(stat.Rdev)),
|
||||
}
|
||||
|
||||
deviceNodes = append(deviceNodes, deviceNode)
|
||||
}
|
||||
|
||||
return deviceNodes, nil
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package devchar
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(stat.Rdev),
|
||||
minor: unix.Minor(stat.Rdev),
|
||||
}
|
||||
return deviceNode
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
//go:build !linux
|
||||
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package devchar
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(uint64(stat.Rdev)),
|
||||
minor: unix.Minor(uint64(stat.Rdev)),
|
||||
}
|
||||
return deviceNode
|
||||
}
|
||||
@@ -19,11 +19,10 @@ package createdevicenodes
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -31,8 +30,7 @@ type command struct {
|
||||
}
|
||||
|
||||
type options struct {
|
||||
root string
|
||||
devRoot string
|
||||
driverRoot string
|
||||
|
||||
dryRun bool
|
||||
|
||||
@@ -66,21 +64,11 @@ func (m command) build() *cli.Command {
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "root",
|
||||
// TODO: Remove this alias
|
||||
Aliases: []string{"driver-root"},
|
||||
Usage: "the path to to the root to use to load the kernel modules. This root must be a chrootable path. " +
|
||||
"If device nodes to be created these will be created at `ROOT`/dev unless an alternative path is specified",
|
||||
Name: "driver-root",
|
||||
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
|
||||
Value: "/",
|
||||
Destination: &opts.root,
|
||||
// TODO: Remove the NVIDIA_DRIVER_ROOT and DRIVER_ROOT envvars.
|
||||
EnvVars: []string{"ROOT", "NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "dev-root",
|
||||
Usage: "specify the root where `/dev` is located. If this is not specified, the root is assumed.",
|
||||
Destination: &opts.devRoot,
|
||||
EnvVars: []string{"NVIDIA_DEV_ROOT", "DEV_ROOT"},
|
||||
Destination: &opts.driverRoot,
|
||||
EnvVars: []string{"DRIVER_ROOT"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "control-devices",
|
||||
@@ -94,7 +82,7 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "if set, the command will not perform any operations",
|
||||
Usage: "if set, the command will not create any symlinks.",
|
||||
Value: false,
|
||||
Destination: &opts.dryRun,
|
||||
EnvVars: []string{"DRY_RUN"},
|
||||
@@ -105,10 +93,6 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
func (m command) validateFlags(r *cli.Context, opts *options) error {
|
||||
if opts.devRoot == "" && opts.root != "" {
|
||||
m.logger.Infof("Using dev-root %q", opts.root)
|
||||
opts.devRoot = opts.root
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -117,7 +101,7 @@ func (m command) run(c *cli.Context, opts *options) error {
|
||||
modules := nvmodules.New(
|
||||
nvmodules.WithLogger(m.logger),
|
||||
nvmodules.WithDryRun(opts.dryRun),
|
||||
nvmodules.WithRoot(opts.root),
|
||||
nvmodules.WithRoot(opts.driverRoot),
|
||||
)
|
||||
if err := modules.LoadAll(); err != nil {
|
||||
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
|
||||
@@ -128,12 +112,12 @@ func (m command) run(c *cli.Context, opts *options) error {
|
||||
devices, err := nvdevices.New(
|
||||
nvdevices.WithLogger(m.logger),
|
||||
nvdevices.WithDryRun(opts.dryRun),
|
||||
nvdevices.WithDevRoot(opts.devRoot),
|
||||
nvdevices.WithDevRoot(opts.driverRoot),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.logger.Infof("Creating control device nodes at %s", opts.devRoot)
|
||||
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
|
||||
if err := devices.CreateNVIDIAControlDevices(); err != nil {
|
||||
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
|
||||
}
|
||||
|
||||
101
cmd/nvidia-ctk/system/print-ldcache/print-ldcache.go
Normal file
101
cmd/nvidia-ctk/system/print-ldcache/print-ldcache.go
Normal file
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package createdevicenodes
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type options struct {
|
||||
driverRoot string
|
||||
}
|
||||
|
||||
// NewCommand constructs a command sub-command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
opts := options{}
|
||||
|
||||
c := cli.Command{
|
||||
Name: "print-ldcache",
|
||||
Usage: "A utility to print the contents of the ldcache",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &opts)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &opts)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "driver-root",
|
||||
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
|
||||
Value: "/",
|
||||
Destination: &opts.driverRoot,
|
||||
EnvVars: []string{"DRIVER_ROOT"},
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(r *cli.Context, opts *options) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, opts *options) error {
|
||||
cache, err := ldcache.New(m.logger, opts.driverRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create ldcache: %v", err)
|
||||
}
|
||||
|
||||
lib32, lib64 := cache.List()
|
||||
|
||||
if len(lib32) == 0 {
|
||||
m.logger.Info("No 32-bit libraries found")
|
||||
} else {
|
||||
m.logger.Infof("%d 32-bit libraries found", len(lib32))
|
||||
for _, lib := range lib32 {
|
||||
m.logger.Infof("%v", lib)
|
||||
}
|
||||
}
|
||||
if len(lib64) == 0 {
|
||||
m.logger.Info("No 64-bit libraries found")
|
||||
} else {
|
||||
m.logger.Infof("%d 64-bit libraries found", len(lib64))
|
||||
for _, lib := range lib64 {
|
||||
m.logger.Infof("%v", lib)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user