mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d167812ce3 | ||
|
|
7ff23999e8 | ||
|
|
a9b01a43bc | ||
|
|
ccff00bc30 | ||
|
|
f7d54200c6 | ||
|
|
29fd206f3a | ||
|
|
cfe0d5d07e | ||
|
|
9ab640b2be | ||
|
|
9d2e4b48bc | ||
|
|
c050bcf081 | ||
|
|
27d0fa4ee2 | ||
|
|
e0e22fdceb | ||
|
|
c1eae0deda | ||
|
|
68f0203a49 | ||
|
|
cc688f7c75 | ||
|
|
7566eb124a | ||
|
|
eb5d50abc4 |
@@ -19,6 +19,7 @@ default:
|
||||
|
||||
variables:
|
||||
GIT_SUBMODULE_STRATEGY: recursive
|
||||
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
|
||||
BUILD_MULTI_ARCH_IMAGES: "true"
|
||||
|
||||
stages:
|
||||
@@ -144,7 +145,7 @@ trigger-pipeline:
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
|
||||
script:
|
||||
- make -f deployments/container/Makefile test-${DIST}
|
||||
- make -f build/container/Makefile test-${DIST}
|
||||
|
||||
# Define the test targets
|
||||
test-packaging:
|
||||
@@ -194,7 +195,7 @@ test-packaging:
|
||||
|
||||
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
|
||||
# Target
|
||||
- make -f deployments/container/Makefile push-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
# Define a staging release step that pushes an image to an internal "staging" repository
|
||||
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
|
||||
@@ -224,6 +225,13 @@ test-packaging:
|
||||
OUT_IMAGE_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}"
|
||||
|
||||
# Define the release jobs
|
||||
release:staging-centos7:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-centos7
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
release:staging-ubi8:
|
||||
extends:
|
||||
- .release:staging
|
||||
|
||||
56
.github/dependabot.yml
vendored
56
.github/dependabot.yml
vendored
@@ -1,56 +0,0 @@
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "gomod"
|
||||
target-branch: main
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
ignore:
|
||||
- dependency-name: k8s.io/*
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/deployments/container"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
|
||||
- package-ecosystem: "gomod"
|
||||
# This defines a specific dependabot rule for the latest release-* branch.
|
||||
target-branch: release-1.14
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
ignore:
|
||||
- dependency-name: k8s.io/*
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
target-branch: gh-pages
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
|
||||
# Allow dependabot to update the libnvidia-container submodule.
|
||||
- package-ecosystem: "gitsubmodule"
|
||||
target-branch: main
|
||||
directory: "/"
|
||||
allow:
|
||||
- dependency-name: "third_party/libnvidia-container"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
labels:
|
||||
- dependencies
|
||||
- libnvidia-container
|
||||
113
.github/workflows/blossom-ci.yml
vendored
Normal file
113
.github/workflows/blossom-ci.yml
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# A workflow to trigger ci on hybrid infra (github + self hosted runner)
|
||||
name: Blossom-CI
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
platform:
|
||||
description: 'runs-on argument'
|
||||
required: false
|
||||
args:
|
||||
description: 'argument'
|
||||
required: false
|
||||
jobs:
|
||||
Authorization:
|
||||
name: Authorization
|
||||
runs-on: blossom
|
||||
outputs:
|
||||
args: ${{ env.args }}
|
||||
|
||||
# This job only runs for pull request comments
|
||||
if: |
|
||||
contains( '\
|
||||
anstockatnv,\
|
||||
rorajani,\
|
||||
cdesiniotis,\
|
||||
shivamerla,\
|
||||
ArangoGutierrez,\
|
||||
elezar,\
|
||||
klueska,\
|
||||
zvonkok,\
|
||||
', format('{0},', github.actor)) &&
|
||||
github.event.comment.body == '/blossom-ci'
|
||||
steps:
|
||||
- name: Check if comment is issued by authorized person
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'AUTH'
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
|
||||
|
||||
Vulnerability-scan:
|
||||
name: Vulnerability scan
|
||||
needs: [Authorization]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
|
||||
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
|
||||
lfs: 'true'
|
||||
|
||||
# repo specific steps
|
||||
#- name: Setup java
|
||||
# uses: actions/setup-java@v1
|
||||
# with:
|
||||
# java-version: 1.8
|
||||
|
||||
# add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
|
||||
#- name: Setup blackduck properties
|
||||
# run: |
|
||||
# PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
|
||||
# echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
|
||||
# echo detect.maven.included.scopes=compile >> application.properties
|
||||
|
||||
- name: Run blossom action
|
||||
uses: NVIDIA/blossom-action@main
|
||||
env:
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
|
||||
with:
|
||||
args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
|
||||
args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
|
||||
args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
|
||||
|
||||
Job-trigger:
|
||||
name: Start ci job
|
||||
needs: [Vulnerability-scan]
|
||||
runs-on: blossom
|
||||
steps:
|
||||
- name: Start ci job
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'START-CI-JOB'
|
||||
CI_SERVER: ${{ secrets.CI_SERVER }}
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
Upload-Log:
|
||||
name: Upload log
|
||||
runs-on: blossom
|
||||
if : github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here)
|
||||
run: blossom-ci
|
||||
env:
|
||||
OPERATION: 'POST-PROCESSING'
|
||||
CI_SERVER: ${{ secrets.CI_SERVER }}
|
||||
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
76
.github/workflows/golang.yaml
vendored
76
.github/workflows/golang.yaml
vendored
@@ -1,76 +0,0 @@
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: Golang
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Checkout code
|
||||
- name: Get Golang version
|
||||
id: vars
|
||||
run: |
|
||||
GOLANG_VERSION=$( grep "GOLANG_VERSION :=" versions.mk )
|
||||
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GOLANG_VERSION }}
|
||||
- name: Lint
|
||||
uses: golangci/golangci-lint-action@v4
|
||||
with:
|
||||
version: latest
|
||||
args: -v --timeout 5m
|
||||
skip-cache: true
|
||||
- name: Check golang modules
|
||||
run: make check-vendor
|
||||
test:
|
||||
name: Unit test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
- name: Get Golang version
|
||||
id: vars
|
||||
run: |
|
||||
GOLANG_VERSION=$( grep "GOLANG_VERSION :=" versions.mk )
|
||||
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: ${{ env.GOLANG_VERSION }}
|
||||
- run: make test
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Checkout code
|
||||
|
||||
- name: Build
|
||||
run: make docker-build
|
||||
138
.github/workflows/image.yaml
vendored
138
.github/workflows/image.yaml
vendored
@@ -1,138 +0,0 @@
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Run this workflow on pull requests
|
||||
name: image
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
packages:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
target:
|
||||
- ubuntu18.04-arm64
|
||||
- ubuntu18.04-amd64
|
||||
- ubuntu18.04-ppc64le
|
||||
- centos7-aarch64
|
||||
- centos7-x86_64
|
||||
- centos8-ppc64le
|
||||
ispr:
|
||||
- ${{github.event_name == 'pull_request'}}
|
||||
exclude:
|
||||
- ispr: true
|
||||
target: ubuntu18.04-arm64
|
||||
- ispr: true
|
||||
target: ubuntu18.04-ppc64le
|
||||
- ispr: true
|
||||
target: centos7-aarch64
|
||||
- ispr: true
|
||||
target: centos8-ppc64le
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: build ${{ matrix.target }} packages
|
||||
run: |
|
||||
sudo apt-get install -y coreutils build-essential sed git bash make
|
||||
echo "Building packages"
|
||||
./scripts/build-packages.sh ${{ matrix.target }}
|
||||
- name: 'Upload Artifacts'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
compression-level: 0
|
||||
name: toolkit-container-${{ matrix.target }}-${{ github.run_id }}
|
||||
path: ${{ github.workspace }}/dist/*
|
||||
|
||||
image:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
dist:
|
||||
- ubuntu20.04
|
||||
- ubi8
|
||||
- packaging
|
||||
ispr:
|
||||
- ${{github.event_name == 'pull_request'}}
|
||||
exclude:
|
||||
- ispr: true
|
||||
dist: ubi8
|
||||
needs: packages
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
- name: Calculate build vars
|
||||
id: vars
|
||||
run: |
|
||||
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
|
||||
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
|
||||
REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
|
||||
echo "${REPO_FULL_NAME}"
|
||||
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
|
||||
|
||||
PUSH_ON_BUILD="false"
|
||||
BUILD_MULTI_ARCH_IMAGES="false"
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
if [[ "${{ github.actor }}" != "dependabot[bot]" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
|
||||
# For non-fork PRs that are not created by dependabot we do push images
|
||||
PUSH_ON_BUILD="true"
|
||||
fi
|
||||
elif [[ "${{ github.event_name }}" == "push" ]]; then
|
||||
# On push events we do generate images and enable muilti-arch builds
|
||||
PUSH_ON_BUILD="true"
|
||||
BUILD_MULTI_ARCH_IMAGES="true"
|
||||
fi
|
||||
echo "PUSH_ON_BUILD=${PUSH_ON_BUILD}" >> $GITHUB_ENV
|
||||
echo "BUILD_MULTI_ARCH_IMAGES=${BUILD_MULTI_ARCH_IMAGES}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Get built packages
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: ${{ github.workspace }}/dist/
|
||||
pattern: toolkit-container-*-${{ github.run_id }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Build image
|
||||
env:
|
||||
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/container-toolkit
|
||||
VERSION: ${COMMIT_SHORT_SHA}
|
||||
run: |
|
||||
echo "${VERSION}"
|
||||
make -f deployments/container/Makefile build-${{ matrix.dist }}
|
||||
22
.github/workflows/pre-sanity.yml
vendored
Normal file
22
.github/workflows/pre-sanity.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: Run pre sanity
|
||||
|
||||
# run this workflow for each commit
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Build dev image
|
||||
run: make .build-image
|
||||
|
||||
- name: Build
|
||||
run: make docker-build
|
||||
|
||||
- name: Tests
|
||||
run: make docker-coverage
|
||||
|
||||
- name: Checks
|
||||
run: make docker-check
|
||||
@@ -15,6 +15,68 @@
|
||||
include:
|
||||
- .common-ci.yml
|
||||
|
||||
build-dev-image:
|
||||
stage: image
|
||||
script:
|
||||
- apk --no-cache add make bash
|
||||
- make .build-image
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- make .push-build-image
|
||||
|
||||
.requires-build-image:
|
||||
image: "${BUILDIMAGE}"
|
||||
|
||||
.go-check:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-checks
|
||||
|
||||
fmt:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make assert-fmt
|
||||
|
||||
vet:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make vet
|
||||
|
||||
lint:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make lint
|
||||
allow_failure: true
|
||||
|
||||
ineffassign:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make ineffassign
|
||||
allow_failure: true
|
||||
|
||||
misspell:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make misspell
|
||||
|
||||
go-build:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-build
|
||||
script:
|
||||
- make build
|
||||
|
||||
unit-tests:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: unit-tests
|
||||
script:
|
||||
- make coverage
|
||||
|
||||
# Define the package build helpers
|
||||
.multi-arch-build:
|
||||
before_script:
|
||||
@@ -76,12 +138,24 @@ package-centos7-x86_64:
|
||||
- .dist-centos7
|
||||
- .arch-x86_64
|
||||
|
||||
package-centos8-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-aarch64
|
||||
|
||||
package-centos8-ppc64le:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-ppc64le
|
||||
|
||||
package-centos8-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-x86_64
|
||||
|
||||
package-ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .package-build
|
||||
@@ -126,7 +200,15 @@ package-ubuntu18.04-ppc64le:
|
||||
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
script:
|
||||
- make -f deployments/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos7
|
||||
needs:
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
@@ -156,6 +238,8 @@ image-packaging:
|
||||
- .package-artifacts
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- job: package-centos8-aarch64
|
||||
- job: package-centos8-x86_64
|
||||
- job: package-ubuntu18.04-amd64
|
||||
- job: package-ubuntu18.04-arm64
|
||||
- job: package-amazonlinux2-aarch64
|
||||
@@ -232,3 +316,4 @@ test-docker-ubuntu20.04:
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
|
||||
4
.gitmodules
vendored
4
.gitmodules
vendored
@@ -1,4 +1,4 @@
|
||||
[submodule "third_party/libnvidia-container"]
|
||||
path = third_party/libnvidia-container
|
||||
url = https://github.com/NVIDIA/libnvidia-container.git
|
||||
branch = main
|
||||
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
|
||||
branch = release-1.14
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
run:
|
||||
deadline: 10m
|
||||
|
||||
linters:
|
||||
enable:
|
||||
- contextcheck
|
||||
- gocritic
|
||||
- gofmt
|
||||
- goimports
|
||||
- gosec
|
||||
- gosimple
|
||||
- govet
|
||||
- ineffassign
|
||||
- misspell
|
||||
- staticcheck
|
||||
- unconvert
|
||||
|
||||
linters-settings:
|
||||
goimports:
|
||||
local-prefixes: github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
issues:
|
||||
exclude:
|
||||
# The legacy hook relies on spec.Hooks.Prestart, which is deprecated as of the v1.2.0 OCI runtime spec.
|
||||
- "SA1019:(.+).Prestart is deprecated(.+)"
|
||||
exclude-rules:
|
||||
# Exclude the gocritic dupSubExpr issue for cgo files.
|
||||
- path: internal/dxcore/dxcore.go
|
||||
linters:
|
||||
- gocritic
|
||||
text: dupSubExpr
|
||||
# Exclude the checks for usage of returns to config.Delete(Path) in the crio and containerd config packages.
|
||||
- path: pkg/config/engine/
|
||||
linters:
|
||||
- errcheck
|
||||
text: config.Delete
|
||||
@@ -67,7 +67,12 @@ variables:
|
||||
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
|
||||
script:
|
||||
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
|
||||
- make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
|
||||
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .image-pull
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
@@ -127,6 +132,14 @@ image-packaging:
|
||||
- policy_evaluation.json
|
||||
|
||||
# Define the scan targets
|
||||
scan-centos7-amd64:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
scan-ubuntu20.04-amd64:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
@@ -230,6 +243,11 @@ release:staging-ubuntu20.04:
|
||||
|
||||
# Define the external release targets
|
||||
# Release to NGC
|
||||
release:ngc-centos7:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubuntu20.04:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
|
||||
63
CHANGELOG.md
63
CHANGELOG.md
@@ -1,62 +1,19 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.15.0
|
||||
|
||||
* Remove `nvidia-container-runtime` and `nvidia-docker2` packages.
|
||||
* Use `XDG_DATA_DIRS` environment variable when locating config files such as graphics config files.
|
||||
* Add support for v0.7.0 Container Device Interface (CDI) specification.
|
||||
* Add `--config-search-path` option to `nvidia-ctk cdi generate` command. These paths are used when locating driver files such as graphics config files.
|
||||
* Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
|
||||
* Add support for v1.2.0 OCI Runtime specification.
|
||||
* Explicitly set `NVIDIA_VISIBLE_DEVICES=void` in generated CDI specifications. This prevents the NVIDIA Container Runtime from making additional modifications.
|
||||
|
||||
* [libnvidia-container] Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
|
||||
|
||||
* [toolkit-container] Bump CUDA base image version to 12.4.1
|
||||
|
||||
## v1.15.0-rc.4
|
||||
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
|
||||
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
|
||||
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
|
||||
CDI spec to be generated that includes GPUs by index and UUID.
|
||||
* Set the default `--device-name-strategy` for the `nvidia-ctk cdi generate` command to `[index, uuid]`.
|
||||
* Remove `libnvidia-container0` jetpack dependency included for legacy Tegra-based systems.
|
||||
* Add `NVIDIA_VISIBLE_DEVICES=void` to generated CDI specifications.
|
||||
|
||||
* [toolkit-container] Remove centos7 image. The ubi8 image can be used on all RPM-based platforms.
|
||||
* [toolkit-container] Bump CUDA base image version to 12.3.2
|
||||
|
||||
## v1.15.0-rc.3
|
||||
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.
|
||||
|
||||
## v1.15.0-rc.2
|
||||
* Extend the `runtime.nvidia.com/gpu` CDI kind to support full-GPUs and MIG devices specified by index or UUID.
|
||||
* Fix bug when specifying `--dev-root` for Tegra-based systems.
|
||||
* Log explicitly requested runtime mode.
|
||||
* Remove package dependency on libseccomp.
|
||||
* Added detection of libnvdxgdmal.so.1 on WSL2
|
||||
* Use devRoot to resolve MIG device nodes.
|
||||
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
|
||||
* Add `crun` to the list of configured low-level runtimes.
|
||||
* Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command.
|
||||
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker.
|
||||
* Add discovery of the GDRCopy device (`gdrdrv`) if the `NVIDIA_GDRCOPY` environment variable of the container is set to `enabled`
|
||||
|
||||
* [toolkit-container] Bump CUDA base image version to 12.3.1.
|
||||
|
||||
## v1.15.0-rc.1
|
||||
* Skip update of ldcache in containers without ldconfig. The .so.SONAME symlinks are still created.
|
||||
* Normalize ldconfig path on use. This automatically adjust the ldconfig setting applied to ldconfig.real on systems where this exists.
|
||||
## v1.14.4
|
||||
* Include `nvidia/nvoptix.bin` in list of graphics mounts.
|
||||
* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts.
|
||||
* Add support for `--library-search-paths` to `nvidia-ctk cdi generate` command.
|
||||
* Add support for injecting /dev/nvidia-nvswitch* devices if the NVIDIA_NVSWITCH=enabled envvar is specified.
|
||||
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
|
||||
* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly.
|
||||
* Add `--relative-to` option to `nvidia-ctk transform root` command. This controls whether the root transformation is applied to host or container paths.
|
||||
* Added automatic CDI spec generation when the `runtime.nvidia.com/gpu=all` device is requested by a container.
|
||||
* Log explicitly requested runtime mode.
|
||||
* Remove package dependency on libseccomp.
|
||||
* Added detection of libnvdxgdmal.so.1 on WSL2.
|
||||
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
|
||||
* Add `crun` to the list of configured low-level runtimes.
|
||||
* Add `--cdi.enabled` option to `nvidia-ctk runtime configure` command to enable CDI in containerd.
|
||||
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
|
||||
|
||||
* [libnvidia-container] Fix device permission check when using cgroupv2 (fixes #227)
|
||||
* [toolkit-container] Bump CUDA base image version to 12.3.1.
|
||||
* [libnvidia-container] Added detection of libnvdxgdmal.so.1 on WSL2.
|
||||
|
||||
## v1.14.3
|
||||
* [toolkit-container] Bump CUDA base image version to 12.2.2.
|
||||
|
||||
@@ -19,7 +19,7 @@ where `TARGET` is a make target that is valid for each of the sub-components.
|
||||
|
||||
These include:
|
||||
* `ubuntu18.04-amd64`
|
||||
* `centos7-x86_64`
|
||||
* `centos8-x86_64`
|
||||
|
||||
If no `TARGET` is specified, all valid release targets are built.
|
||||
|
||||
|
||||
142
Jenkinsfile
vendored
Normal file
142
Jenkinsfile
vendored
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
podTemplate (cloud:'sw-gpu-cloudnative',
|
||||
containers: [
|
||||
containerTemplate(name: 'docker', image: 'docker:dind', ttyEnabled: true, privileged: true),
|
||||
containerTemplate(name: 'golang', image: 'golang:1.16.3', ttyEnabled: true)
|
||||
]) {
|
||||
node(POD_LABEL) {
|
||||
def scmInfo
|
||||
|
||||
stage('checkout') {
|
||||
scmInfo = checkout(scm)
|
||||
}
|
||||
|
||||
stage('dependencies') {
|
||||
container('golang') {
|
||||
sh 'GO111MODULE=off go get -u github.com/client9/misspell/cmd/misspell'
|
||||
sh 'GO111MODULE=off go get -u github.com/gordonklaus/ineffassign'
|
||||
sh 'GO111MODULE=off go get -u golang.org/x/lint/golint'
|
||||
}
|
||||
container('docker') {
|
||||
sh 'apk add --no-cache make bash git'
|
||||
}
|
||||
}
|
||||
stage('check') {
|
||||
parallel (
|
||||
getGolangStages(["assert-fmt", "lint", "vet", "ineffassign", "misspell"])
|
||||
)
|
||||
}
|
||||
stage('test') {
|
||||
parallel (
|
||||
getGolangStages(["test"])
|
||||
)
|
||||
}
|
||||
|
||||
def versionInfo
|
||||
stage('version') {
|
||||
container('docker') {
|
||||
versionInfo = getVersionInfo(scmInfo)
|
||||
println "versionInfo=${versionInfo}"
|
||||
}
|
||||
}
|
||||
|
||||
def dist = 'ubuntu20.04'
|
||||
def arch = 'amd64'
|
||||
def stageLabel = "${dist}-${arch}"
|
||||
|
||||
stage('build-one') {
|
||||
container('docker') {
|
||||
stage (stageLabel) {
|
||||
sh "make ${dist}-${arch}"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('release') {
|
||||
container('docker') {
|
||||
stage (stageLabel) {
|
||||
|
||||
def component = 'main'
|
||||
def repository = 'sw-gpu-cloudnative-debian-local/pool/main/'
|
||||
|
||||
def uploadSpec = """{
|
||||
"files":
|
||||
[ {
|
||||
"pattern": "./dist/${dist}/${arch}/*.deb",
|
||||
"target": "${repository}",
|
||||
"props": "deb.distribution=${dist};deb.component=${component};deb.architecture=${arch}"
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
sh "echo starting release with versionInfo=${versionInfo}"
|
||||
if (versionInfo.isTag) {
|
||||
// upload to artifactory repository
|
||||
def server = Artifactory.server 'sw-gpu-artifactory'
|
||||
server.upload spec: uploadSpec
|
||||
} else {
|
||||
sh "echo skipping release for non-tagged build"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def getGolangStages(def targets) {
|
||||
stages = [:]
|
||||
|
||||
for (t in targets) {
|
||||
stages[t] = getLintClosure(t)
|
||||
}
|
||||
|
||||
return stages
|
||||
}
|
||||
|
||||
def getLintClosure(def target) {
|
||||
return {
|
||||
container('golang') {
|
||||
stage(target) {
|
||||
sh "make ${target}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getVersionInfo returns a hash of version info
|
||||
def getVersionInfo(def scmInfo) {
|
||||
def versionInfo = [
|
||||
isTag: isTag(scmInfo.GIT_BRANCH)
|
||||
]
|
||||
|
||||
scmInfo.each { k, v -> versionInfo[k] = v }
|
||||
return versionInfo
|
||||
}
|
||||
|
||||
def isTag(def branch) {
|
||||
if (!branch.startsWith('v')) {
|
||||
return false
|
||||
}
|
||||
|
||||
def version = shOutput('git describe --all --exact-match --always')
|
||||
return version == "tags/${branch}"
|
||||
}
|
||||
|
||||
def shOuptut(def script) {
|
||||
return sh(script: script, returnStdout: true).trim()
|
||||
}
|
||||
90
Makefile
90
Makefile
@@ -38,8 +38,8 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
|
||||
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
|
||||
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
|
||||
|
||||
CHECK_TARGETS := lint
|
||||
MAKE_TARGETS := binaries build check fmt test examples cmds coverage generate licenses vendor check-vendor $(CHECK_TARGETS)
|
||||
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
|
||||
|
||||
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
|
||||
|
||||
@@ -53,26 +53,22 @@ CLI_VERSION = $(VERSION)
|
||||
endif
|
||||
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
|
||||
|
||||
GOOS ?= linux
|
||||
|
||||
binaries: cmds
|
||||
ifneq ($(PREFIX),)
|
||||
cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
|
||||
endif
|
||||
cmds: $(CMD_TARGETS)
|
||||
|
||||
ifneq ($(shell uname),Darwin)
|
||||
EXTLDFLAGS = -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
else
|
||||
EXTLDFLAGS = -Wl,-undefined,dynamic_lookup
|
||||
endif
|
||||
$(CMD_TARGETS): cmd-%:
|
||||
go build -ldflags "-s -w '-extldflags=$(EXTLDFLAGS)' -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
|
||||
build:
|
||||
go build ./...
|
||||
GOOS=$(GOOS) go build ./...
|
||||
|
||||
examples: $(EXAMPLE_TARGETS)
|
||||
$(EXAMPLE_TARGETS): example-%:
|
||||
go build ./examples/$(*)
|
||||
GOOS=$(GOOS) go build ./examples/$(*)
|
||||
|
||||
all: check test build binary
|
||||
check: $(CHECK_TARGETS)
|
||||
@@ -82,28 +78,37 @@ fmt:
|
||||
go list -f '{{.Dir}}' $(MODULE)/... \
|
||||
| xargs gofmt -s -l -w
|
||||
|
||||
# Apply goimports -local github.com/NVIDIA/container-toolkit to the codebase
|
||||
goimports:
|
||||
go list -f {{.Dir}} $(MODULE)/... \
|
||||
| xargs goimports -local $(MODULE) -w
|
||||
assert-fmt:
|
||||
go list -f '{{.Dir}}' $(MODULE)/... \
|
||||
| xargs gofmt -s -l > fmt.out
|
||||
@if [ -s fmt.out ]; then \
|
||||
echo "\nERROR: The following files are not formatted:\n"; \
|
||||
cat fmt.out; \
|
||||
rm fmt.out; \
|
||||
exit 1; \
|
||||
else \
|
||||
rm fmt.out; \
|
||||
fi
|
||||
|
||||
ineffassign:
|
||||
ineffassign $(MODULE)/...
|
||||
|
||||
lint:
|
||||
golangci-lint run ./...
|
||||
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
|
||||
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
|
||||
|
||||
vendor:
|
||||
go mod tidy
|
||||
go mod vendor
|
||||
go mod verify
|
||||
misspell:
|
||||
misspell $(MODULE)/...
|
||||
|
||||
check-vendor: vendor
|
||||
git diff --quiet HEAD -- go.mod go.sum vendor
|
||||
vet:
|
||||
go vet $(MODULE)/...
|
||||
|
||||
licenses:
|
||||
go-licenses csv $(MODULE)/...
|
||||
|
||||
COVERAGE_FILE := coverage.out
|
||||
test: build cmds
|
||||
go test -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
|
||||
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
|
||||
|
||||
coverage: test
|
||||
cat $(COVERAGE_FILE) | grep -v "_mock.go" > $(COVERAGE_FILE).no-mocks
|
||||
@@ -112,15 +117,32 @@ coverage: test
|
||||
generate:
|
||||
go generate $(MODULE)/...
|
||||
|
||||
$(DOCKER_TARGETS): docker-%:
|
||||
@echo "Running 'make $(*)' in container image $(BUILDIMAGE)"
|
||||
# Generate an image for containerized builds
|
||||
# Note: This image is local only
|
||||
.PHONY: .build-image .pull-build-image .push-build-image
|
||||
.build-image: docker/Dockerfile.devel
|
||||
if [ x"$(SKIP_IMAGE_BUILD)" = x"" ]; then \
|
||||
$(DOCKER) build \
|
||||
--progress=plain \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
-f $(^) \
|
||||
docker; \
|
||||
fi
|
||||
|
||||
.pull-build-image:
|
||||
$(DOCKER) pull $(BUILDIMAGE)
|
||||
|
||||
.push-build-image:
|
||||
$(DOCKER) push $(BUILDIMAGE)
|
||||
|
||||
$(DOCKER_TARGETS): docker-%: .build-image
|
||||
@echo "Running 'make $(*)' in docker container $(BUILDIMAGE)"
|
||||
$(DOCKER) run \
|
||||
--rm \
|
||||
-e GOCACHE=/tmp/.cache/go \
|
||||
-e GOMODCACHE=/tmp/.cache/gomod \
|
||||
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
|
||||
-v $(PWD):/work \
|
||||
-w /work \
|
||||
-e GOCACHE=/tmp/.cache \
|
||||
-v $(PWD):$(PWD) \
|
||||
-w $(PWD) \
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE) \
|
||||
make $(*)
|
||||
@@ -131,10 +153,8 @@ PHONY: .shell
|
||||
$(DOCKER) run \
|
||||
--rm \
|
||||
-ti \
|
||||
-e GOCACHE=/tmp/.cache/go \
|
||||
-e GOMODCACHE=/tmp/.cache/gomod \
|
||||
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
|
||||
-v $(PWD):/work \
|
||||
-w /work \
|
||||
-e GOCACHE=/tmp/.cache \
|
||||
-v $(PWD):$(PWD) \
|
||||
-w $(PWD) \
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE)
|
||||
|
||||
@@ -12,10 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG BASE_DIST
|
||||
ARG CUDA_VERSION
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG VERSION="N/A"
|
||||
|
||||
FROM nvidia/cuda:12.4.1-base-ubi8 as build
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
|
||||
|
||||
RUN yum install -y \
|
||||
wget make git gcc \
|
||||
@@ -29,7 +31,7 @@ RUN set -eux; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64 | arm64) ARCH='arm64' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
@@ -48,7 +50,17 @@ COPY . .
|
||||
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
|
||||
|
||||
|
||||
FROM nvidia/cuda:12.4.1-base-ubi8
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ARG BASE_DIST
|
||||
# See https://www.centos.org/centos-linux-eol/
|
||||
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
|
||||
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
|
||||
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
|
||||
( \
|
||||
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
|
||||
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
|
||||
)
|
||||
|
||||
ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=void
|
||||
@@ -12,9 +12,11 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG BASE_DIST
|
||||
ARG CUDA_VERSION
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
|
||||
FROM nvidia/cuda:12.4.1-base-ubuntu20.04
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ARG ARTIFACTS_ROOT
|
||||
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
@@ -22,6 +24,7 @@ COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
# build-args are added to the manifest.txt file below.
|
||||
ARG BASE_DIST
|
||||
ARG PACKAGE_DIST
|
||||
ARG PACKAGE_VERSION
|
||||
ARG GIT_BRANCH
|
||||
@@ -12,10 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG BASE_DIST
|
||||
ARG CUDA_VERSION
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG VERSION="N/A"
|
||||
|
||||
FROM nvidia/cuda:12.4.1-base-ubuntu20.04 as build
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget make git gcc \
|
||||
@@ -29,7 +31,7 @@ RUN set -eux; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64 | arm64) ARCH='arm64' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
@@ -47,7 +49,7 @@ COPY . .
|
||||
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
|
||||
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:12.4.1-base-ubuntu20.04
|
||||
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
|
||||
RUN rm -f /etc/apt/sources.list.d/cuda.list
|
||||
@@ -73,6 +75,14 @@ ARG PACKAGE_VERSION
|
||||
ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container/stable"
|
||||
ARG LIBNVIDIA_CONTAINER0_VERSION
|
||||
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
|
||||
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
|
||||
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
|
||||
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
|
||||
fi
|
||||
|
||||
RUN dpkg -i \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
|
||||
@@ -45,7 +45,7 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
|
||||
|
||||
##### Public rules #####
|
||||
DEFAULT_PUSH_TARGET := ubuntu20.04
|
||||
DISTRIBUTIONS := ubuntu20.04 ubi8
|
||||
DISTRIBUTIONS := ubuntu20.04 ubi8 centos7
|
||||
|
||||
META_TARGETS := packaging
|
||||
|
||||
@@ -56,9 +56,9 @@ TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
|
||||
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||
|
||||
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
|
||||
include $(CURDIR)/deployments/container/native-only.mk
|
||||
include $(CURDIR)/build/container/native-only.mk
|
||||
else
|
||||
include $(CURDIR)/deployments/container/multi-arch.mk
|
||||
include $(CURDIR)/build/container/multi-arch.mk
|
||||
endif
|
||||
|
||||
# For the default push target we also push a short tag equal to the version.
|
||||
@@ -84,7 +84,7 @@ push-short:
|
||||
|
||||
|
||||
build-%: DIST = $(*)
|
||||
build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SUFFIX)
|
||||
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
|
||||
|
||||
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
||||
|
||||
@@ -96,7 +96,10 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
|
||||
--tag $(IMAGE) \
|
||||
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
|
||||
--build-arg BASE_DIST="$(BASE_DIST)" \
|
||||
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
|
||||
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
|
||||
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
|
||||
--build-arg VERSION="$(VERSION)" \
|
||||
@@ -108,12 +111,20 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
$(CURDIR)
|
||||
|
||||
|
||||
build-ubuntu%: BASE_DIST = $(*)
|
||||
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
|
||||
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
|
||||
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
|
||||
|
||||
build-ubi8: DOCKERFILE_SUFFIX := ubi8
|
||||
build-ubi8: BASE_DIST := ubi8
|
||||
build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
build-ubi8: PACKAGE_DIST = centos7
|
||||
|
||||
build-centos7: BASE_DIST = $(*)
|
||||
build-centos7: DOCKERFILE_SUFFIX := centos
|
||||
build-centos7: PACKAGE_DIST = $(BASE_DIST)
|
||||
|
||||
build-packaging: BASE_DIST := ubuntu20.04
|
||||
build-packaging: DOCKERFILE_SUFFIX := packaging
|
||||
build-packaging: PACKAGE_ARCH := amd64
|
||||
build-packaging: PACKAGE_DIST = all
|
||||
@@ -134,7 +145,9 @@ test-packaging:
|
||||
@echo "Testing package image contents"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/aarch64" || echo "Missing centos7/aarch64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"
|
||||
@@ -16,6 +16,9 @@ PUSH_ON_BUILD ?= false
|
||||
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
|
||||
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
|
||||
|
||||
# We only have x86_64 packages for centos7
|
||||
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate amd64 image for ubuntu18.04
|
||||
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
@@ -9,10 +9,9 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -23,7 +22,6 @@ const (
|
||||
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
||||
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
|
||||
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
|
||||
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
|
||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
)
|
||||
|
||||
@@ -39,7 +37,6 @@ type nvidiaConfig struct {
|
||||
Devices string
|
||||
MigConfigDevices string
|
||||
MigMonitorDevices string
|
||||
ImexChannels string
|
||||
DriverCapabilities string
|
||||
// Requirements defines the requirements DSL for the container to run.
|
||||
// This is empty if no specific requirements are needed, or if requirements are
|
||||
@@ -177,7 +174,7 @@ func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *stri
|
||||
// if specified.
|
||||
var hasSwarmEnvvar bool
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if image.HasEnvvar(envvar) {
|
||||
if _, exists := image[envvar]; exists {
|
||||
hasSwarmEnvvar = true
|
||||
break
|
||||
}
|
||||
@@ -260,39 +257,28 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMigConfigDevices(image image.CUDA) *string {
|
||||
return getMigDevices(image, envNVMigConfigDevices)
|
||||
}
|
||||
|
||||
func getMigMonitorDevices(image image.CUDA) *string {
|
||||
return getMigDevices(image, envNVMigMonitorDevices)
|
||||
}
|
||||
|
||||
func getMigDevices(image image.CUDA, envvar string) *string {
|
||||
if !image.HasEnvvar(envvar) {
|
||||
return nil
|
||||
func getMigConfigDevices(env map[string]string) *string {
|
||||
if devices, ok := env[envNVMigConfigDevices]; ok {
|
||||
return &devices
|
||||
}
|
||||
devices := image.Getenv(envvar)
|
||||
return &devices
|
||||
return nil
|
||||
}
|
||||
|
||||
func getImexChannels(image image.CUDA) *string {
|
||||
if !image.HasEnvvar(envNVImexChannels) {
|
||||
return nil
|
||||
func getMigMonitorDevices(env map[string]string) *string {
|
||||
if devices, ok := env[envNVMigMonitorDevices]; ok {
|
||||
return &devices
|
||||
}
|
||||
chans := image.Getenv(envNVImexChannels)
|
||||
return &chans
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
|
||||
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
|
||||
// We use the default driver capabilities by default. This is filtered to only include the
|
||||
// supported capabilities
|
||||
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
|
||||
|
||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||
|
||||
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
|
||||
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
|
||||
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
|
||||
|
||||
if !capsEnvSpecified && legacyImage {
|
||||
// Environment variable unset with legacy image: set all capabilities.
|
||||
@@ -338,11 +324,6 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
var imexChannels string
|
||||
if c := getImexChannels(image); c != nil {
|
||||
imexChannels = *c
|
||||
}
|
||||
|
||||
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
|
||||
|
||||
requirements, err := image.GetRequirements()
|
||||
@@ -354,7 +335,6 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
|
||||
Devices: devices,
|
||||
MigConfigDevices: migConfigDevices,
|
||||
MigMonitorDevices: migMonitorDevices,
|
||||
ImexChannels: imexChannels,
|
||||
DriverCapabilities: driverCapabilities,
|
||||
Requirements: requirements,
|
||||
}
|
||||
|
||||
@@ -5,9 +5,8 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetNvidiaConfig(t *testing.T) {
|
||||
@@ -466,9 +465,6 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
// Wrap the call to getNvidiaConfig() in a closure.
|
||||
var config *nvidiaConfig
|
||||
getConfig := func() {
|
||||
@@ -477,7 +473,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
defaultConfig, _ := getDefaultHookConfig()
|
||||
hookConfig = &defaultConfig
|
||||
}
|
||||
config = getNvidiaConfig(hookConfig, image, nil, tc.privileged)
|
||||
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
|
||||
}
|
||||
|
||||
// For any tests that are expected to panic, make sure they do.
|
||||
@@ -682,17 +678,13 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
||||
// Wrap the call to getDevices() in a closure.
|
||||
var devices *string
|
||||
getDevices := func() {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(
|
||||
map[string]string{
|
||||
envNVVisibleDevices: tc.envvarDevices,
|
||||
},
|
||||
),
|
||||
)
|
||||
env := map[string]string{
|
||||
envNVVisibleDevices: tc.envvarDevices,
|
||||
}
|
||||
hookConfig, _ := getDefaultHookConfig()
|
||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged)
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
@@ -913,10 +905,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
|
||||
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
|
||||
if tc.expectedDevices == nil {
|
||||
require.Nil(t, devices, "%d: %v", i, tc)
|
||||
return
|
||||
@@ -1026,17 +1015,14 @@ func TestGetDriverCapabilities(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
var capabilities string
|
||||
var capabilites string
|
||||
|
||||
c := HookConfig{
|
||||
SupportedDriverCapabilities: tc.supportedCapabilities,
|
||||
}
|
||||
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
getDriverCapabilities := func() {
|
||||
capabilities = c.getDriverCapabilities(image, tc.legacyImage).String()
|
||||
capabilites = c.getDriverCapabilities(tc.env, tc.legacyImage).String()
|
||||
}
|
||||
|
||||
if tc.expectedPanic {
|
||||
@@ -1045,7 +1031,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
||||
}
|
||||
|
||||
getDriverCapabilities()
|
||||
require.EqualValues(t, tc.expectedCapabilities, capabilities)
|
||||
require.EqualValues(t, tc.expectedCapabilities, capabilites)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ const (
|
||||
driverPath = "/run/nvidia/driver"
|
||||
)
|
||||
|
||||
var defaultPaths = [...]string{}
|
||||
|
||||
// HookConfig : options for the nvidia-container-runtime-hook.
|
||||
type HookConfig config.Config
|
||||
|
||||
|
||||
@@ -21,9 +21,8 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetHookConfig(t *testing.T) {
|
||||
|
||||
@@ -111,8 +111,8 @@ func doPrestart() {
|
||||
}
|
||||
args = append(args, "configure")
|
||||
|
||||
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
|
||||
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
|
||||
if cli.Ldconfig != "" {
|
||||
args = append(args, fmt.Sprintf("--ldconfig=%s", cli.Ldconfig))
|
||||
}
|
||||
if cli.NoCgroups {
|
||||
args = append(args, "--no-cgroups")
|
||||
@@ -126,9 +126,6 @@ func doPrestart() {
|
||||
if len(nvidia.MigMonitorDevices) > 0 {
|
||||
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
|
||||
}
|
||||
if len(nvidia.ImexChannels) > 0 {
|
||||
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
|
||||
}
|
||||
|
||||
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
|
||||
if len(cap) == 0 {
|
||||
@@ -145,7 +142,6 @@ func doPrestart() {
|
||||
args = append(args, rootfs)
|
||||
|
||||
env := append(os.Environ(), cli.Environment...)
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection?
|
||||
err = syscall.Exec(args[0], args, env)
|
||||
log.Panicln("exec failed:", err)
|
||||
}
|
||||
|
||||
@@ -85,126 +85,3 @@ Alternatively the NVIDIA Container Runtime can be set as the default runtime for
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Environment variables (OCI spec)
|
||||
|
||||
Each environment variable maps to an command-line argument for `nvidia-container-cli` from [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
|
||||
These variables are already set in our [official CUDA images](https://hub.docker.com/r/nvidia/cuda/).
|
||||
|
||||
### `NVIDIA_VISIBLE_DEVICES`
|
||||
This variable controls which GPUs will be made accessible inside the container.
|
||||
|
||||
#### Possible values
|
||||
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
|
||||
* `all`: all GPUs will be accessible, this is the default value in our container images.
|
||||
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
|
||||
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
|
||||
|
||||
**Note**: When running on a MIG capable device, the following values will also be available:
|
||||
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
|
||||
|
||||
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
|
||||
```
|
||||
$ nvidia-smi -L
|
||||
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
|
||||
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
|
||||
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
|
||||
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
|
||||
```
|
||||
|
||||
### `NVIDIA_MIG_CONFIG_DEVICES`
|
||||
This variable controls which of the visible GPUs can have their MIG
|
||||
configuration managed from within the container. This includes enabling and
|
||||
disabling MIG mode, creating and destroying GPU Instances and Compute
|
||||
Instances, etc.
|
||||
|
||||
#### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG configurations managed.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
|
||||
|
||||
### `NVIDIA_MIG_MONITOR_DEVICES`
|
||||
This variable controls which of the visible GPUs can have aggregate information
|
||||
about all of their MIG devices monitored from within the container. This
|
||||
includes inspecting the aggregate memory usage, listing the aggregate running
|
||||
processes, etc.
|
||||
|
||||
#### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG devices monitored.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
|
||||
|
||||
### `NVIDIA_DRIVER_CAPABILITIES`
|
||||
This option controls which driver libraries/binaries will be mounted inside the container.
|
||||
|
||||
#### Possible values
|
||||
* `compute,video`, `graphics,utility` …: a comma-separated list of driver features the container needs.
|
||||
* `all`: enable all available driver capabilities.
|
||||
* *empty* or *unset*: use default driver capability: `utility,compute`.
|
||||
|
||||
#### Supported driver capabilities
|
||||
* `compute`: required for CUDA and OpenCL applications.
|
||||
* `compat32`: required for running 32-bit applications.
|
||||
* `graphics`: required for running OpenGL and Vulkan applications.
|
||||
* `utility`: required for using `nvidia-smi` and NVML.
|
||||
* `video`: required for using the Video Codec SDK.
|
||||
* `display`: required for leveraging X11 display.
|
||||
|
||||
### `NVIDIA_REQUIRE_*`
|
||||
A logical expression to define constraints on the configurations supported by the container.
|
||||
|
||||
#### Supported constraints
|
||||
* `cuda`: constraint on the CUDA driver version.
|
||||
* `driver`: constraint on the driver version.
|
||||
* `arch`: constraint on the compute architectures of the selected GPUs.
|
||||
* `brand`: constraint on the brand of the selected GPUs (e.g. GeForce, Tesla, GRID).
|
||||
|
||||
#### Expressions
|
||||
Multiple constraints can be expressed in a single environment variable: space-separated constraints are ORed, comma-separated constraints are ANDed.
|
||||
Multiple environment variables of the form `NVIDIA_REQUIRE_*` are ANDed together.
|
||||
|
||||
### `NVIDIA_DISABLE_REQUIRE`
|
||||
Single switch to disable all the constraints of the form `NVIDIA_REQUIRE_*`.
|
||||
|
||||
### `NVIDIA_REQUIRE_CUDA`
|
||||
|
||||
The version of the CUDA toolkit used by the container. It is an instance of the generic `NVIDIA_REQUIRE_*` case and it is set by official CUDA images.
|
||||
If the version of the NVIDIA driver is insufficient to run this version of CUDA, the container will not be started.
|
||||
|
||||
#### Possible values
|
||||
* `cuda>=7.5`, `cuda>=8.0`, `cuda>=9.0` …: any valid CUDA version in the form `major.minor`.
|
||||
|
||||
### `CUDA_VERSION`
|
||||
Similar to `NVIDIA_REQUIRE_CUDA`, for legacy CUDA images.
|
||||
In addition, if `NVIDIA_REQUIRE_CUDA` is not set, `NVIDIA_VISIBLE_DEVICES` and `NVIDIA_DRIVER_CAPABILITIES` will default to `all`.
|
||||
|
||||
## Usage example
|
||||
|
||||
**NOTE:** The use of the `nvidia-container-runtime` as CLI replacement for `runc` is uncommon and is only provided for completeness.
|
||||
|
||||
Although the `nvidia-container-runtime` is typically configured as a replacement for `runc` or `crun` in various container engines, it can also be
|
||||
invoked from the command line as `runc` would. For example:
|
||||
|
||||
```sh
|
||||
# Setup a rootfs based on Ubuntu 16.04
|
||||
cd $(mktemp -d) && mkdir rootfs
|
||||
curl -sS http://cdimage.ubuntu.com/ubuntu-base/releases/16.04/release/ubuntu-base-16.04.6-base-amd64.tar.gz | tar --exclude 'dev/*' -C rootfs -xz
|
||||
|
||||
# Create an OCI runtime spec
|
||||
nvidia-container-runtime spec
|
||||
sed -i 's;"sh";"nvidia-smi";' config.json
|
||||
sed -i 's;\("TERM=xterm"\);\1, "NVIDIA_VISIBLE_DEVICES=0";' config.json
|
||||
|
||||
# Run the container
|
||||
sudo nvidia-container-runtime run nvidia_smi
|
||||
```
|
||||
|
||||
@@ -3,7 +3,7 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
@@ -11,12 +11,11 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -87,7 +86,6 @@ func TestBadInput(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
@@ -105,7 +103,6 @@ func TestGoodInput(t *testing.T) {
|
||||
t.Fatalf("error generating runtime spec: %v", err)
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
|
||||
output, err := cmdRun.CombinedOutput()
|
||||
@@ -116,7 +113,6 @@ func TestGoodInput(t *testing.T) {
|
||||
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
|
||||
require.Empty(t, spec.Hooks, "there should be no hooks in config.json")
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
@@ -162,7 +158,6 @@ func TestDuplicateHook(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test how runtime handles already existing prestart hook in config.json
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
output, err := cmdCreate.CombinedOutput()
|
||||
@@ -193,16 +188,15 @@ func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
|
||||
}
|
||||
defer jsonFile.Close()
|
||||
|
||||
jsonContent, err := io.ReadAll(jsonFile)
|
||||
switch {
|
||||
case err != nil:
|
||||
jsonContent, err := ioutil.ReadAll(jsonFile)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
case json.Valid(jsonContent):
|
||||
} else if json.Valid(jsonContent) {
|
||||
err = json.Unmarshal(jsonContent, &spec)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
}
|
||||
default:
|
||||
} else {
|
||||
err = json.NewDecoder(bytes.NewReader(jsonContent)).Decode(&spec)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
@@ -232,7 +226,6 @@ func (c testConfig) generateNewRuntimeSpec() error {
|
||||
return err
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmd := exec.Command("cp", c.unmodifiedSpecFile(), c.specFilePath())
|
||||
err = cmd.Run()
|
||||
if err != nil {
|
||||
|
||||
@@ -43,7 +43,7 @@ By default, all commands output to `STDOUT`, but specifying the `--output` flag
|
||||
|
||||
### Generate CDI specifications
|
||||
|
||||
The [Container Device Interface (CDI)](https://tags.cncf.io/container-device-interface) provides
|
||||
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
|
||||
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
|
||||
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
|
||||
available NVIDIA GPUs in a system.
|
||||
|
||||
@@ -17,12 +17,11 @@
|
||||
package cdi
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/list"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -22,15 +22,14 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
cdi "tags.cncf.io/container-device-interface/pkg/parser"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -42,18 +41,15 @@ type command struct {
|
||||
}
|
||||
|
||||
type options struct {
|
||||
output string
|
||||
format string
|
||||
deviceNameStrategies cli.StringSlice
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCTKPath string
|
||||
ldconfigPath string
|
||||
mode string
|
||||
vendor string
|
||||
class string
|
||||
output string
|
||||
format string
|
||||
deviceNameStrategy string
|
||||
driverRoot string
|
||||
nvidiaCTKPath string
|
||||
mode string
|
||||
vendor string
|
||||
class string
|
||||
|
||||
configSearchPaths cli.StringSlice
|
||||
librarySearchPaths cli.StringSlice
|
||||
|
||||
csv struct {
|
||||
@@ -87,11 +83,6 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "config-search-path",
|
||||
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
|
||||
Destination: &opts.configSearchPaths,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "output",
|
||||
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
|
||||
@@ -111,15 +102,10 @@ func (m command) build() *cli.Command {
|
||||
Destination: &opts.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "dev-root",
|
||||
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
|
||||
Destination: &opts.devRoot,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "device-name-strategy",
|
||||
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
|
||||
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
|
||||
Destination: &opts.deviceNameStrategies,
|
||||
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
|
||||
Value: nvcdi.DeviceNameStrategyIndex,
|
||||
Destination: &opts.deviceNameStrategy,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "driver-root",
|
||||
@@ -136,11 +122,6 @@ func (m command) build() *cli.Command {
|
||||
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
|
||||
Destination: &opts.nvidiaCTKPath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "ldconfig-path",
|
||||
Usage: "Specify the path to use for ldconfig in the generated CDI specification",
|
||||
Destination: &opts.ldconfigPath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "vendor",
|
||||
Aliases: []string{"cdi-vendor"},
|
||||
@@ -191,11 +172,9 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
|
||||
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
|
||||
}
|
||||
|
||||
for _, strategy := range opts.deviceNameStrategies.Value() {
|
||||
_, err := nvcdi.NewDeviceNamer(strategy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
|
||||
@@ -249,24 +228,17 @@ func formatFromFilename(filename string) string {
|
||||
}
|
||||
|
||||
func (m command) generateSpec(opts *options) (spec.Interface, error) {
|
||||
var deviceNamers []nvcdi.DeviceNamer
|
||||
for _, strategy := range opts.deviceNameStrategies.Value() {
|
||||
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device namer: %v", err)
|
||||
}
|
||||
deviceNamers = append(deviceNamers, deviceNamer)
|
||||
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device namer: %v", err)
|
||||
}
|
||||
|
||||
cdilib, err := nvcdi.New(
|
||||
nvcdi.WithLogger(m.logger),
|
||||
nvcdi.WithDriverRoot(opts.driverRoot),
|
||||
nvcdi.WithDevRoot(opts.devRoot),
|
||||
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
|
||||
nvcdi.WithLdconfigPath(opts.ldconfigPath),
|
||||
nvcdi.WithDeviceNamers(deviceNamers...),
|
||||
nvcdi.WithMode(opts.mode),
|
||||
nvcdi.WithConfigSearchPaths(opts.configSearchPaths.Value()),
|
||||
nvcdi.WithDeviceNamer(deviceNamer),
|
||||
nvcdi.WithMode(string(opts.mode)),
|
||||
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
|
||||
nvcdi.WithCSVFiles(opts.csv.files.Value()),
|
||||
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
|
||||
|
||||
@@ -17,22 +17,18 @@
|
||||
package list
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type config struct {
|
||||
cdiSpecDirs cli.StringSlice
|
||||
}
|
||||
type config struct{}
|
||||
|
||||
// NewCommand constructs a cdi list command with the specified logger
|
||||
func NewCommand(logger logger.Interface) *cli.Command {
|
||||
@@ -58,44 +54,30 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "spec-dir",
|
||||
Usage: "specify the directories to scan for CDI specifications",
|
||||
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
|
||||
Destination: &cfg.cdiSpecDirs,
|
||||
},
|
||||
}
|
||||
c.Flags = []cli.Flag{}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, cfg *config) error {
|
||||
if len(cfg.cdiSpecDirs.Value()) == 0 {
|
||||
return errors.New("at least one CDI specification directory must be specified")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
registry, err := cdi.NewCache(
|
||||
cdi.WithAutoRefresh(false),
|
||||
cdi.WithSpecDirs(cfg.cdiSpecDirs.Value()...),
|
||||
cdi.WithSpecDirs(cdi.DefaultSpecDirs...),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create CDI cache: %v", err)
|
||||
}
|
||||
|
||||
_ = registry.Refresh()
|
||||
if errors := registry.GetErrors(); len(errors) > 0 {
|
||||
m.logger.Warningf("The following registry errors were reported:")
|
||||
for k, err := range errors {
|
||||
m.logger.Warningf("%v: %v", k, err)
|
||||
}
|
||||
}
|
||||
|
||||
refreshErr := registry.Refresh()
|
||||
devices := registry.ListDevices()
|
||||
m.logger.Infof("Found %d CDI devices", len(devices))
|
||||
if refreshErr != nil {
|
||||
m.logger.Warningf("Refreshing the CDI registry returned the following error(s): %v", refreshErr)
|
||||
}
|
||||
for _, device := range devices {
|
||||
fmt.Printf("%s\n", device)
|
||||
}
|
||||
|
||||
@@ -21,14 +21,18 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type loadSaver interface {
|
||||
Load() (spec.Interface, error)
|
||||
Save(spec.Interface) error
|
||||
}
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
@@ -40,9 +44,8 @@ type transformOptions struct {
|
||||
|
||||
type options struct {
|
||||
transformOptions
|
||||
from string
|
||||
to string
|
||||
relativeTo string
|
||||
from string
|
||||
to string
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
@@ -69,11 +72,6 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "from",
|
||||
Usage: "specify the root to be transformed",
|
||||
Destination: &opts.from,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "input",
|
||||
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
|
||||
@@ -86,10 +84,9 @@ func (m command) build() *cli.Command {
|
||||
Destination: &opts.output,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "relative-to",
|
||||
Usage: "specify whether the transform is relative to the host or to the container. One of [ host | container ]",
|
||||
Value: "host",
|
||||
Destination: &opts.relativeTo,
|
||||
Name: "from",
|
||||
Usage: "specify the root to be transformed",
|
||||
Destination: &opts.from,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "to",
|
||||
@@ -103,12 +100,6 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, opts *options) error {
|
||||
switch opts.relativeTo {
|
||||
case "host":
|
||||
case "container":
|
||||
default:
|
||||
return fmt.Errorf("invalid --relative-to value: %v", opts.relativeTo)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -118,10 +109,9 @@ func (m command) run(c *cli.Context, opts *options) error {
|
||||
return fmt.Errorf("failed to load CDI specification: %w", err)
|
||||
}
|
||||
|
||||
err = transformroot.New(
|
||||
transformroot.WithRoot(opts.from),
|
||||
transformroot.WithTargetRoot(opts.to),
|
||||
transformroot.WithRelativeTo(opts.relativeTo),
|
||||
err = transform.NewRootTransformer(
|
||||
opts.from,
|
||||
opts.to,
|
||||
).Transform(spec.Raw())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to transform CDI specification: %w", err)
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package transform
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -109,11 +109,7 @@ func run(c *cli.Context, opts *options) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid --set option %v: %w", set, err)
|
||||
}
|
||||
if value == nil {
|
||||
_ = cfgToml.Delete(key)
|
||||
} else {
|
||||
cfgToml.Set(key, value)
|
||||
}
|
||||
cfgToml.Set(key, value)
|
||||
}
|
||||
|
||||
if err := opts.EnsureOutputFolder(); err != nil {
|
||||
@@ -125,10 +121,10 @@ func run(c *cli.Context, opts *options) error {
|
||||
}
|
||||
defer output.Close()
|
||||
|
||||
if _, err := cfgToml.Save(output); err != nil {
|
||||
return fmt.Errorf("failed to save config: %v", err)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfgToml.Save(output)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -150,25 +146,20 @@ func setFlagToKeyValue(setFlag string) (string, interface{}, error) {
|
||||
|
||||
kind := field.Kind()
|
||||
if len(setParts) != 2 {
|
||||
if kind == reflect.Bool || (kind == reflect.Pointer && field.Elem().Kind() == reflect.Bool) {
|
||||
if kind == reflect.Bool {
|
||||
return key, true, nil
|
||||
}
|
||||
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
|
||||
}
|
||||
|
||||
value := setParts[1]
|
||||
if kind == reflect.Pointer && value != "nil" {
|
||||
kind = field.Elem().Kind()
|
||||
}
|
||||
switch kind {
|
||||
case reflect.Pointer:
|
||||
return key, nil, nil
|
||||
case reflect.Bool:
|
||||
b, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
|
||||
}
|
||||
return key, b, nil
|
||||
return key, b, err
|
||||
case reflect.String:
|
||||
return key, value, nil
|
||||
case reflect.Slice:
|
||||
@@ -210,7 +201,7 @@ func getStruct(current reflect.Type, paths ...string) (reflect.StructField, erro
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if strings.SplitN(v, ",", 2)[0] != tomlField {
|
||||
if v != tomlField {
|
||||
continue
|
||||
}
|
||||
if len(paths) == 1 {
|
||||
|
||||
@@ -19,11 +19,10 @@ package defaultsubcommand
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -86,7 +85,8 @@ func (m command) run(c *cli.Context, opts *flags.Options) error {
|
||||
}
|
||||
defer output.Close()
|
||||
|
||||
if _, err = cfgToml.Save(output); err != nil {
|
||||
_, err = cfgToml.Save(output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write output: %v", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,18 +17,16 @@
|
||||
package chmod
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -37,8 +35,7 @@ type command struct {
|
||||
|
||||
type config struct {
|
||||
paths cli.StringSlice
|
||||
modeStr string
|
||||
mode fs.FileMode
|
||||
mode string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
@@ -69,13 +66,13 @@ func (m command) build() *cli.Command {
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "path",
|
||||
Usage: "Specify a path to apply the specified mode to",
|
||||
Usage: "Specifiy a path to apply the specified mode to",
|
||||
Destination: &cfg.paths,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "mode",
|
||||
Usage: "Specify the file mode",
|
||||
Destination: &cfg.modeStr,
|
||||
Destination: &cfg.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
@@ -88,16 +85,10 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
func validateFlags(c *cli.Context, cfg *config) error {
|
||||
if strings.TrimSpace(cfg.modeStr) == "" {
|
||||
if strings.TrimSpace(cfg.mode) == "" {
|
||||
return fmt.Errorf("a non-empty mode must be specified")
|
||||
}
|
||||
|
||||
modeInt, err := strconv.ParseUint(cfg.modeStr, 8, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse mode as octal: %v", err)
|
||||
}
|
||||
cfg.mode = fs.FileMode(modeInt)
|
||||
|
||||
for _, p := range cfg.paths.Value() {
|
||||
if strings.TrimSpace(p) == "" {
|
||||
return fmt.Errorf("paths must not be empty")
|
||||
@@ -121,38 +112,33 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
return fmt.Errorf("empty container root detected")
|
||||
}
|
||||
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value(), cfg.mode)
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value())
|
||||
if len(paths) == 0 {
|
||||
m.logger.Debugf("No paths specified; exiting")
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, path := range paths {
|
||||
err = os.Chmod(path, cfg.mode)
|
||||
// in some cases this is not an issue (e.g. whole /dev mounted), see #143
|
||||
if errors.Is(err, fs.ErrPermission) {
|
||||
m.logger.Debugf("Ignoring permission error with chmod: %v", err)
|
||||
err = nil
|
||||
}
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
targets, err := locator.Locate("chmod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to locate chmod: %v", err)
|
||||
}
|
||||
chmodPath := targets[0]
|
||||
|
||||
return err
|
||||
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
|
||||
|
||||
return syscall.Exec(chmodPath, args, nil)
|
||||
}
|
||||
|
||||
// getPaths updates the specified paths relative to the root.
|
||||
func (m command) getPaths(root string, paths []string, desiredMode fs.FileMode) []string {
|
||||
func (m command) getPaths(root string, paths []string) []string {
|
||||
var pathsInRoot []string
|
||||
for _, f := range paths {
|
||||
path := filepath.Join(root, f)
|
||||
stat, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
m.logger.Debugf("Skipping path %q: %v", path, err)
|
||||
continue
|
||||
}
|
||||
if (stat.Mode()&(fs.ModePerm|fs.ModeSetuid|fs.ModeSetgid|fs.ModeSticky))^desiredMode == 0 {
|
||||
m.logger.Debugf("Skipping path %q: already desired mode", path)
|
||||
continue
|
||||
}
|
||||
pathsInRoot = append(pathsInRoot, path)
|
||||
}
|
||||
|
||||
|
||||
@@ -22,13 +22,12 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -57,7 +56,7 @@ func (m command) build() *cli.Command {
|
||||
// Create the '' command
|
||||
c := cli.Command{
|
||||
Name: "create-symlinks",
|
||||
Usage: "A hook to create symlinks in the container. This can be used to process CSV mount specs",
|
||||
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type hookCommand struct {
|
||||
|
||||
@@ -17,27 +17,22 @@
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type options struct {
|
||||
type config struct {
|
||||
folders cli.StringSlice
|
||||
ldconfigPath string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
@@ -51,15 +46,12 @@ func NewCommand(logger logger.Interface) *cli.Command {
|
||||
|
||||
// build the update-ldcache command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := options{}
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'update-ldcache' command
|
||||
c := cli.Command{
|
||||
Name: "update-ldcache",
|
||||
Usage: "Update ldcache in a container by running ldconfig",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
@@ -68,15 +60,9 @@ func (m command) build() *cli.Command {
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "folder",
|
||||
Usage: "Specify a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Destination: &cfg.folders,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "ldconfig-path",
|
||||
Usage: "Specify the path to the ldconfig program",
|
||||
Destination: &cfg.ldconfigPath,
|
||||
Value: "/sbin/ldconfig",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
@@ -87,14 +73,7 @@ func (m command) build() *cli.Command {
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, cfg *options) error {
|
||||
if cfg.ldconfigPath == "" {
|
||||
return errors.New("ldconfig-path must be specified")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *options) error {
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
@@ -105,52 +84,23 @@ func (m command) run(c *cli.Context, cfg *options) error {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
ldconfigPath := m.resolveLDConfigPath(cfg.ldconfigPath)
|
||||
args := []string{filepath.Base(ldconfigPath)}
|
||||
_, err = os.Stat(filepath.Join(containerRoot, "/etc/ld.so.cache"))
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
m.logger.Debugf("No ld.so.cache found, skipping update")
|
||||
return nil
|
||||
}
|
||||
|
||||
err = m.createConfig(containerRoot, cfg.folders.Value())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf: %v", err)
|
||||
}
|
||||
|
||||
args := []string{"/sbin/ldconfig"}
|
||||
if containerRoot != "" {
|
||||
args = append(args, "-r", containerRoot)
|
||||
}
|
||||
|
||||
if root(containerRoot).hasPath("/etc/ld.so.cache") {
|
||||
args = append(args, "-C", "/etc/ld.so.cache")
|
||||
} else {
|
||||
m.logger.Debugf("No ld.so.cache found, skipping update")
|
||||
args = append(args, "-N")
|
||||
}
|
||||
|
||||
folders := cfg.folders.Value()
|
||||
if root(containerRoot).hasPath("/etc/ld.so.conf.d") {
|
||||
err := m.createConfig(containerRoot, folders)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
|
||||
}
|
||||
} else {
|
||||
args = append(args, folders...)
|
||||
}
|
||||
|
||||
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
|
||||
// be configured to use a different config file by default.
|
||||
args = append(args, "-f", "/etc/ld.so.conf")
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(ldconfigPath, args, nil)
|
||||
}
|
||||
|
||||
type root string
|
||||
|
||||
func (r root) hasPath(path string) bool {
|
||||
_, err := os.Stat(filepath.Join(string(r), path))
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// resolveLDConfigPath determines the LDConfig path to use for the system.
|
||||
// On systems such as Ubuntu where `/sbin/ldconfig` is a wrapper around
|
||||
// /sbin/ldconfig.real, the latter is returned.
|
||||
func (m command) resolveLDConfigPath(path string) string {
|
||||
return strings.TrimPrefix(config.NormalizeLDConfigPath("@"+path), "@")
|
||||
return syscall.Exec(args[0], args, nil)
|
||||
}
|
||||
|
||||
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
|
||||
|
||||
@@ -17,9 +17,8 @@
|
||||
package info
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -19,8 +19,6 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
|
||||
@@ -28,6 +26,7 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
@@ -149,7 +149,6 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "cdi.enabled",
|
||||
Aliases: []string{"cdi.enable"},
|
||||
Usage: "Enable CDI in the configured runtime",
|
||||
Destination: &config.cdi.enabled,
|
||||
},
|
||||
@@ -309,11 +308,9 @@ func enableCDI(config *config, cfg engine.Interface) error {
|
||||
}
|
||||
switch config.runtime {
|
||||
case "containerd":
|
||||
cfg.Set("enable_cdi", true)
|
||||
return cfg.Set("enable_cdi", true)
|
||||
case "docker":
|
||||
cfg.Set("features", map[string]bool{"cdi": true})
|
||||
default:
|
||||
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
|
||||
return cfg.Set("experimental", true)
|
||||
}
|
||||
return nil
|
||||
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
|
||||
}
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type runtimeCommand struct {
|
||||
|
||||
@@ -20,11 +20,10 @@ import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
|
||||
)
|
||||
|
||||
type allPossible struct {
|
||||
|
||||
@@ -24,12 +24,11 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -87,7 +86,7 @@ func (m command) build() *cli.Command {
|
||||
Usage: "The path to the driver root. `DRIVER_ROOT`/dev is searched for NVIDIA device nodes.",
|
||||
Value: "/",
|
||||
Destination: &cfg.driverRoot,
|
||||
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
|
||||
EnvVars: []string{"DRIVER_ROOT"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "watch",
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type nodeLister interface {
|
||||
@@ -64,13 +63,20 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
|
||||
if m.nodeIsBlocked(d) {
|
||||
continue
|
||||
}
|
||||
|
||||
var stat unix.Stat_t
|
||||
err := unix.Stat(d, &stat)
|
||||
if err != nil {
|
||||
m.logger.Warningf("Could not stat device: %v", err)
|
||||
continue
|
||||
}
|
||||
deviceNodes = append(deviceNodes, newDeviceNode(d, stat))
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(uint64(stat.Rdev)),
|
||||
minor: unix.Minor(uint64(stat.Rdev)),
|
||||
}
|
||||
|
||||
deviceNodes = append(deviceNodes, deviceNode)
|
||||
}
|
||||
|
||||
return deviceNodes, nil
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package devchar
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(stat.Rdev),
|
||||
minor: unix.Minor(stat.Rdev),
|
||||
}
|
||||
return deviceNode
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
//go:build !linux
|
||||
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package devchar
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(uint64(stat.Rdev)),
|
||||
minor: unix.Minor(uint64(stat.Rdev)),
|
||||
}
|
||||
return deviceNode
|
||||
}
|
||||
@@ -19,11 +19,10 @@ package createdevicenodes
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -69,7 +68,7 @@ func (m command) build() *cli.Command {
|
||||
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
|
||||
Value: "/",
|
||||
Destination: &opts.driverRoot,
|
||||
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
|
||||
EnvVars: []string{"DRIVER_ROOT"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "control-devices",
|
||||
|
||||
@@ -19,10 +19,9 @@ package createdevicenodes
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -62,7 +61,7 @@ func (m command) build() *cli.Command {
|
||||
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
|
||||
Value: "/",
|
||||
Destination: &opts.driverRoot,
|
||||
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
|
||||
EnvVars: []string{"DRIVER_ROOT"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -17,12 +17,11 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
|
||||
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/print-ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -22,7 +22,7 @@ RUN set -eux; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64 | arm64) ARCH='arm64' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG GOLANGCI_LINT_VERSION=v1.54.1
|
||||
FROM golang:${GOLANG_VERSION}
|
||||
|
||||
RUN go install golang.org/x/lint/golint@6edffad5e6160f5949cdefc81710b2706fbcd4f6
|
||||
@@ -20,8 +19,3 @@ RUN go install github.com/matryer/moq@latest
|
||||
RUN go install github.com/gordonklaus/ineffassign@d2c82e48359b033cde9cf1307f6d5550b8d61321
|
||||
RUN go install github.com/client9/misspell/cmd/misspell@latest
|
||||
RUN go install github.com/google/go-licenses@latest
|
||||
RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin ${GOLANGCI_LINT_VERSION}
|
||||
|
||||
# We need to set the /work directory as a safe directory.
|
||||
# This allows git commands to run in the container.
|
||||
RUN git config --file=/.gitconfig --add safe.directory /work
|
||||
|
||||
@@ -15,7 +15,7 @@ RUN set -eux; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64 | arm64) ARCH='arm64' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture"; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
|
||||
@@ -33,7 +33,7 @@ RUN set -eux; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64 | arm64) ARCH='arm64' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture"; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
|
||||
@@ -20,7 +20,7 @@ RUN set -eux; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64 | arm64) ARCH='arm64' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
|
||||
26
go.mod
26
go.mod
@@ -3,33 +3,31 @@ module github.com/NVIDIA/nvidia-container-toolkit
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
github.com/NVIDIA/go-nvlib v0.2.0
|
||||
github.com/NVIDIA/go-nvml v0.12.0-3
|
||||
github.com/fsnotify/fsnotify v1.7.0
|
||||
github.com/opencontainers/runtime-spec v1.2.0
|
||||
github.com/pelletier/go-toml v1.9.5
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/stretchr/testify v1.9.0
|
||||
github.com/urfave/cli/v2 v2.27.1
|
||||
golang.org/x/mod v0.17.0
|
||||
golang.org/x/sys v0.19.0
|
||||
tags.cncf.io/container-device-interface v0.7.1
|
||||
tags.cncf.io/container-device-interface/specs-go v0.7.0
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.6.0
|
||||
github.com/fsnotify/fsnotify v1.5.4
|
||||
github.com/opencontainers/runtime-spec v1.1.0-rc.2
|
||||
github.com/pelletier/go-toml v1.9.4
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/stretchr/testify v1.8.1
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884
|
||||
golang.org/x/mod v0.5.0
|
||||
golang.org/x/sys v0.7.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/opencontainers/runc v1.1.6 // indirect
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
|
||||
github.com/opencontainers/selinux v1.11.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
|
||||
64
go.sum
64
go.sum
@@ -1,20 +1,20 @@
|
||||
github.com/NVIDIA/go-nvlib v0.2.0 h1:roq+SDstbP1fcy2XVH7wB2Gz2/Ud7Q+NGQYOcVITVrA=
|
||||
github.com/NVIDIA/go-nvlib v0.2.0/go.mod h1:kFuLNTyD1tF6FbRFlk+/EdUW5BrkE+v1Y3A3/9zKSjA=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-3 h1:QwfjYxEqIQVRhl8327g2Y3ZvKResPydpGSKtCIIK9jE=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-3/go.mod h1:SOufGc5Wql+cxrIZ8RyJwVKDYxfbs4WPkHXqadcbfvA=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1 h1:6mdjtlFo+17dWL7VFPfuRMtf0061TF4DKls9pkSw6uM=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
|
||||
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
|
||||
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.6.0 h1:aWwcz/Ep0Fd7ZuBjQGjU/jdPloM7ydhMW13h85jZNvk=
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.6.0/go.mod h1:OQlgtJtDrOxSQ1BWODC8OZK1tzi9W69wek+Jy17ndzo=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
||||
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
|
||||
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
|
||||
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
|
||||
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
@@ -29,37 +29,45 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
|
||||
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
|
||||
github.com/opencontainers/runc v1.1.6 h1:XbhB8IfG/EsnhNvZtNdLB0GBw92GYEFvKlhaJk9jUgA=
|
||||
github.com/opencontainers/runc v1.1.6/go.mod h1:CbUumNnWCuTGFukNXahoo/RFBZvDAgRh/smNYNOhA50=
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
|
||||
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-spec v1.1.0-rc.2 h1:ucBtEms2tamYYW/SvGpvq9yUN0NEVL6oyLEwDcTSrk8=
|
||||
github.com/opencontainers/runtime-spec v1.1.0-rc.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
|
||||
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
|
||||
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
|
||||
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
|
||||
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
|
||||
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
||||
github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
|
||||
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
||||
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
|
||||
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
|
||||
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
|
||||
github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
|
||||
github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
|
||||
github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
|
||||
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
||||
@@ -67,18 +75,22 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
|
||||
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
|
||||
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a h1:lceJVurLqiWFdxK6KMDw+SIwrAsFW/af44XrNlbGw78=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a/go.mod h1:KYZksBgh18o+uzgnpDazzG4LVYtnfB96VXHMXypEtik=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884 h1:V0LUbfm4kVA1CPG8FgG9AGZqa3ykE5U12Gd3PZgoItA=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884/go.mod h1:/x5Ky1ZJNyCjDkgSL1atII0EFKQF5WaIHKeP5nkaQfk=
|
||||
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
|
||||
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
|
||||
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
@@ -86,7 +98,3 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
|
||||
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
|
||||
tags.cncf.io/container-device-interface v0.7.1 h1:MATNCbAD1su9U6zwQe5BrQ2vGGp1GBayD70bYaxYCNE=
|
||||
tags.cncf.io/container-device-interface v0.7.1/go.mod h1:h1JVuOqTQVORp8DziaWKUCDNzAmN+zeCbqbqD30D0ZQ=
|
||||
tags.cncf.io/container-device-interface/specs-go v0.7.0 h1:w/maMGVeLP6TIQJVYT5pbqTi8SCw/iHZ+n4ignuGHqg=
|
||||
tags.cncf.io/container-device-interface/specs-go v0.7.0/go.mod h1:hMAwAbMZyBLdmYqWgYcKH0F/yctNpV3P35f+/088A80=
|
||||
|
||||
@@ -16,11 +16,6 @@
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ContainerCLIConfig stores the options for the nvidia-container-cli
|
||||
type ContainerCLIConfig struct {
|
||||
Root string `toml:"root"`
|
||||
@@ -36,27 +31,3 @@ type ContainerCLIConfig struct {
|
||||
User string `toml:"user"`
|
||||
Ldconfig string `toml:"ldconfig"`
|
||||
}
|
||||
|
||||
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
||||
// This is only done for host LDConfigs and is required to handle systems where
|
||||
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
|
||||
func (c *ContainerCLIConfig) NormalizeLDConfigPath() string {
|
||||
return NormalizeLDConfigPath(c.Ldconfig)
|
||||
}
|
||||
|
||||
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
||||
// This is only done for host LDConfigs and is required to handle systems where
|
||||
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
|
||||
func NormalizeLDConfigPath(path string) string {
|
||||
if !strings.HasPrefix(path, "@") {
|
||||
return path
|
||||
}
|
||||
|
||||
trimmedPath := strings.TrimSuffix(strings.TrimPrefix(path, "@"), ".real")
|
||||
// If the .real path exists, we return that.
|
||||
if _, err := os.Stat(trimmedPath + ".real"); err == nil {
|
||||
return "@" + trimmedPath + ".real"
|
||||
}
|
||||
// If the .real path does not exists (or cannot be read) we return the non-.real path.
|
||||
return "@" + trimmedPath
|
||||
}
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
/**
|
||||
# Copyright 2023 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNormalizeLDConfigPath(t *testing.T) {
|
||||
testDir := t.TempDir()
|
||||
|
||||
f, err := os.Create(filepath.Join(testDir, "exists.real"))
|
||||
require.NoError(t, err)
|
||||
_ = f.Close()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
ldconfig string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
description: "empty input",
|
||||
},
|
||||
{
|
||||
description: "non-host with .real suffix returns as is",
|
||||
ldconfig: "/some/path/ldconfig.real",
|
||||
expected: "/some/path/ldconfig.real",
|
||||
},
|
||||
{
|
||||
description: "non-host without .real suffix returns as is",
|
||||
ldconfig: "/some/path/ldconfig",
|
||||
expected: "/some/path/ldconfig",
|
||||
},
|
||||
{
|
||||
description: "host .real file exists is returned",
|
||||
ldconfig: "@" + filepath.Join(testDir, "exists.real"),
|
||||
expected: "@" + filepath.Join(testDir, "exists.real"),
|
||||
},
|
||||
{
|
||||
description: "host resolves .real file",
|
||||
ldconfig: "@" + filepath.Join(testDir, "exists"),
|
||||
expected: "@" + filepath.Join(testDir, "exists.real"),
|
||||
},
|
||||
{
|
||||
description: "host .real file not exists strips suffix",
|
||||
ldconfig: "@/does/not/exist.real",
|
||||
expected: "@/does/not/exist",
|
||||
},
|
||||
{
|
||||
description: "host file returned as is if no .real file exsits",
|
||||
ldconfig: "@/does/not/exist",
|
||||
expected: "@/does/not/exist",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
c := ContainerCLIConfig{
|
||||
Ldconfig: tc.ldconfig,
|
||||
}
|
||||
|
||||
require.Equal(t, tc.expected, c.NormalizeLDConfigPath())
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -22,11 +22,10 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -63,9 +62,6 @@ type Config struct {
|
||||
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
|
||||
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
NVIDIAContainerRuntimeHookConfig RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
|
||||
|
||||
// Features allows for finer control over optional features.
|
||||
Features features `toml:"features,omitempty"`
|
||||
}
|
||||
|
||||
// GetConfigFilePath returns the path to the config file for the configured system
|
||||
@@ -127,7 +123,10 @@ func GetDefault() (*Config, error) {
|
||||
}
|
||||
|
||||
func getLdConfigPath() string {
|
||||
return NormalizeLDConfigPath("@/sbin/ldconfig")
|
||||
if _, err := os.Stat("/sbin/ldconfig.real"); err == nil {
|
||||
return "@/sbin/ldconfig.real"
|
||||
}
|
||||
return "@/sbin/ldconfig"
|
||||
}
|
||||
|
||||
func getUserGroup() string {
|
||||
|
||||
@@ -35,7 +35,7 @@ func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, os.WriteFile(filename, contents, 0600))
|
||||
require.NoError(t, os.WriteFile(filename, contents, 0766))
|
||||
|
||||
cfg, err := GetConfig()
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
type featureName string
|
||||
|
||||
const (
|
||||
FeatureGDS = featureName("gds")
|
||||
FeatureMOFED = featureName("mofed")
|
||||
FeatureNVSWITCH = featureName("nvswitch")
|
||||
FeatureGDRCopy = featureName("gdrcopy")
|
||||
)
|
||||
|
||||
// features specifies a set of named features.
|
||||
type features struct {
|
||||
GDS *feature `toml:"gds,omitempty"`
|
||||
MOFED *feature `toml:"mofed,omitempty"`
|
||||
NVSWITCH *feature `toml:"nvswitch,omitempty"`
|
||||
GDRCopy *feature `toml:"gdrcopy,omitempty"`
|
||||
}
|
||||
|
||||
type feature bool
|
||||
|
||||
// IsEnabled checks whether a specified named feature is enabled.
|
||||
// An optional list of environments to check for feature-specific environment
|
||||
// variables can also be supplied.
|
||||
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
|
||||
featureEnvvars := map[featureName]string{
|
||||
FeatureGDS: "NVIDIA_GDS",
|
||||
FeatureMOFED: "NVIDIA_MOFED",
|
||||
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
|
||||
FeatureGDRCopy: "NVIDIA_GDRCOPY",
|
||||
}
|
||||
|
||||
envvar := featureEnvvars[n]
|
||||
switch n {
|
||||
case FeatureGDS:
|
||||
return fs.GDS.isEnabled(envvar, in...)
|
||||
case FeatureMOFED:
|
||||
return fs.MOFED.isEnabled(envvar, in...)
|
||||
case FeatureNVSWITCH:
|
||||
return fs.NVSWITCH.isEnabled(envvar, in...)
|
||||
case FeatureGDRCopy:
|
||||
return fs.GDRCopy.isEnabled(envvar, in...)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// isEnabled checks whether a feature is enabled.
|
||||
// If the enabled value is explicitly set, this is returned, otherwise the
|
||||
// associated envvar is checked in the specified getenver for the string "enabled"
|
||||
// A CUDA container / image can be passed here.
|
||||
func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
|
||||
if f != nil {
|
||||
return bool(*f)
|
||||
}
|
||||
if envvar == "" {
|
||||
return false
|
||||
}
|
||||
for _, in := range ins {
|
||||
if in.Getenv(envvar) == "enabled" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type getenver interface {
|
||||
Getenv(string) string
|
||||
}
|
||||
@@ -19,13 +19,10 @@ package image
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type builder struct {
|
||||
env map[string]string
|
||||
mounts []specs.Mount
|
||||
env []string
|
||||
disableRequire bool
|
||||
}
|
||||
|
||||
@@ -33,12 +30,7 @@ type builder struct {
|
||||
func New(opt ...Option) (CUDA, error) {
|
||||
b := &builder{}
|
||||
for _, o := range opt {
|
||||
if err := o(b); err != nil {
|
||||
return CUDA{}, err
|
||||
}
|
||||
}
|
||||
if b.env == nil {
|
||||
b.env = make(map[string]string)
|
||||
o(b)
|
||||
}
|
||||
|
||||
return b.build()
|
||||
@@ -46,57 +38,36 @@ func New(opt ...Option) (CUDA, error) {
|
||||
|
||||
// build creates a CUDA image from the builder.
|
||||
func (b builder) build() (CUDA, error) {
|
||||
if b.disableRequire {
|
||||
b.env[envNVDisableRequire] = "true"
|
||||
c := make(CUDA)
|
||||
|
||||
for _, e := range b.env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
c[parts[0]] = parts[1]
|
||||
}
|
||||
|
||||
c := CUDA{
|
||||
env: b.env,
|
||||
mounts: b.mounts,
|
||||
if b.disableRequire {
|
||||
c[envNVDisableRequire] = "true"
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Option is a functional option for creating a CUDA image.
|
||||
type Option func(*builder) error
|
||||
type Option func(*builder)
|
||||
|
||||
// WithDisableRequire sets the disable require option.
|
||||
func WithDisableRequire(disableRequire bool) Option {
|
||||
return func(b *builder) error {
|
||||
return func(b *builder) {
|
||||
b.disableRequire = disableRequire
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithEnv sets the environment variables to use when creating the CUDA image.
|
||||
// Note that this also overwrites the values set with WithEnvMap.
|
||||
func WithEnv(env []string) Option {
|
||||
return func(b *builder) error {
|
||||
envmap := make(map[string]string)
|
||||
for _, e := range env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
envmap[parts[0]] = parts[1]
|
||||
}
|
||||
return WithEnvMap(envmap)(b)
|
||||
}
|
||||
}
|
||||
|
||||
// WithEnvMap sets the environment variable map to use when creating the CUDA image.
|
||||
// Note that this also overwrites the values set with WithEnv.
|
||||
func WithEnvMap(env map[string]string) Option {
|
||||
return func(b *builder) error {
|
||||
return func(b *builder) {
|
||||
b.env = env
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithMounts sets the mounts associated with the CUDA image.
|
||||
func WithMounts(mounts []specs.Mount) Option {
|
||||
return func(b *builder) error {
|
||||
b.mounts = mounts
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ func (c DriverCapabilities) Has(capability DriverCapability) bool {
|
||||
return c[capability]
|
||||
}
|
||||
|
||||
// Any checks whether any of the specified capabilities are set
|
||||
// Any checks whether any of the specified capabilites are set
|
||||
func (c DriverCapabilities) Any(capabilities ...DriverCapability) bool {
|
||||
if c.IsAll() {
|
||||
return true
|
||||
|
||||
@@ -18,13 +18,11 @@ package image
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
"tags.cncf.io/container-device-interface/pkg/parser"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -39,10 +37,7 @@ const (
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
// a map of environment variable to values that can be used to perform lookups
|
||||
// such as requirements.
|
||||
type CUDA struct {
|
||||
env map[string]string
|
||||
mounts []specs.Mount
|
||||
}
|
||||
type CUDA map[string]string
|
||||
|
||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||
// The process environment is read (if present) to construc the CUDA Image.
|
||||
@@ -52,10 +47,7 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||
env = spec.Process.Env
|
||||
}
|
||||
|
||||
return New(
|
||||
WithEnv(env),
|
||||
WithMounts(spec.Mounts),
|
||||
)
|
||||
return New(WithEnv(env))
|
||||
}
|
||||
|
||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
@@ -64,24 +56,12 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
return New(WithEnv(env))
|
||||
}
|
||||
|
||||
// Getenv returns the value of the specified environment variable.
|
||||
// If the environment variable is not specified, an empty string is returned.
|
||||
func (i CUDA) Getenv(key string) string {
|
||||
return i.env[key]
|
||||
}
|
||||
|
||||
// HasEnvvar checks whether the specified envvar is defined in the image.
|
||||
func (i CUDA) HasEnvvar(key string) bool {
|
||||
_, exists := i.env[key]
|
||||
return exists
|
||||
}
|
||||
|
||||
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||
func (i CUDA) IsLegacy() bool {
|
||||
legacyCudaVersion := i.env[envCUDAVersion]
|
||||
cudaRequire := i.env[envNVRequireCUDA]
|
||||
legacyCudaVersion := i[envCUDAVersion]
|
||||
cudaRequire := i[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
@@ -94,7 +74,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range i.env {
|
||||
for name, value := range i {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
@@ -113,7 +93,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||
func (i CUDA) HasDisableRequire() bool {
|
||||
if disable, exists := i.env[envNVDisableRequire]; exists {
|
||||
if disable, exists := i[envNVDisableRequire]; exists {
|
||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||
d, _ := strconv.ParseBool(disable)
|
||||
return d
|
||||
@@ -124,12 +104,12 @@ func (i CUDA) HasDisableRequire() bool {
|
||||
|
||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
// We concantenate all the devices from the specified env.
|
||||
// We concantenate all the devices from the specified envvars.
|
||||
var isSet bool
|
||||
var devices []string
|
||||
requested := make(map[string]bool)
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := i.env[envVar]; ok {
|
||||
if devs, ok := i[envVar]; ok {
|
||||
isSet = true
|
||||
for _, d := range strings.Split(devs, ",") {
|
||||
trimmed := strings.TrimSpace(d)
|
||||
@@ -157,18 +137,18 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
|
||||
// GetDriverCapabilities returns the requested driver capabilities.
|
||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
env := i.env[envNVDriverCapabilities]
|
||||
env := i[envNVDriverCapabilities]
|
||||
|
||||
capabilities := make(DriverCapabilities)
|
||||
capabilites := make(DriverCapabilities)
|
||||
for _, c := range strings.Split(env, ",") {
|
||||
capabilities[DriverCapability(c)] = true
|
||||
capabilites[DriverCapability(c)] = true
|
||||
}
|
||||
|
||||
return capabilities
|
||||
return capabilites
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
cudaVersion := i.env[envCUDAVersion]
|
||||
cudaVersion := i[envCUDAVersion]
|
||||
majorMinor, err := parseMajorMinorVersion(cudaVersion)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
|
||||
@@ -198,79 +178,3 @@ func parseMajorMinorVersion(version string) (string, error) {
|
||||
}
|
||||
return majorMinor, nil
|
||||
}
|
||||
|
||||
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
||||
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||
var hasCDIdevice bool
|
||||
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
||||
if !parser.IsQualifiedName(device) {
|
||||
return false
|
||||
}
|
||||
hasCDIdevice = true
|
||||
}
|
||||
|
||||
for _, device := range i.DevicesFromMounts() {
|
||||
if !strings.HasPrefix(device, "cdi/") {
|
||||
return false
|
||||
}
|
||||
hasCDIdevice = true
|
||||
}
|
||||
return hasCDIdevice
|
||||
}
|
||||
|
||||
const (
|
||||
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||
)
|
||||
|
||||
// DevicesFromMounts returns a list of device specified as mounts.
|
||||
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||
func (i CUDA) DevicesFromMounts() []string {
|
||||
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
||||
seen := make(map[string]bool)
|
||||
var devices []string
|
||||
for _, m := range i.mounts {
|
||||
source := filepath.Clean(m.Source)
|
||||
// Only consider mounts who's host volume is /dev/null
|
||||
if source != "/dev/null" {
|
||||
continue
|
||||
}
|
||||
|
||||
destination := filepath.Clean(m.Destination)
|
||||
if seen[destination] {
|
||||
continue
|
||||
}
|
||||
seen[destination] = true
|
||||
|
||||
// Only consider container mount points that begin with 'root'
|
||||
if !strings.HasPrefix(destination, root) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Grab the full path beyond 'root' and add it to the list of devices
|
||||
device := strings.Trim(strings.TrimPrefix(destination, root), "/")
|
||||
if len(device) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, device)
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
|
||||
func (i CUDA) CDIDevicesFromMounts() []string {
|
||||
var devices []string
|
||||
for _, mountDevice := range i.DevicesFromMounts() {
|
||||
if !strings.HasPrefix(mountDevice, "cdi/") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
|
||||
if len(parts) != 3 {
|
||||
continue
|
||||
}
|
||||
vendor := parts[0]
|
||||
class := parts[1]
|
||||
device := parts[2]
|
||||
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
@@ -126,6 +126,7 @@ func TestGetRequirements(t *testing.T) {
|
||||
requirements, err := image.GetRequirements()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, tc.requirements, requirements)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
@@ -154,7 +154,10 @@ func (t Toml) contents() ([]byte, error) {
|
||||
// format fixes the comments for the config to ensure that they start in column
|
||||
// 1 and are not followed by a space.
|
||||
func (t Toml) format(contents []byte) ([]byte, error) {
|
||||
r := regexp.MustCompile(`(\n*)\s*?#\s*(\S.*)`)
|
||||
r, err := regexp.Compile(`(\n*)\s*?#\s*(\S.*)`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to compile regexp: %v", err)
|
||||
}
|
||||
replaced := r.ReplaceAll(contents, []byte("$1#$2"))
|
||||
|
||||
return replaced, nil
|
||||
|
||||
@@ -23,8 +23,7 @@ import (
|
||||
)
|
||||
|
||||
/*
|
||||
#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
|
||||
#ifdef _WIN32
|
||||
#define CUDAAPI __stdcall
|
||||
|
||||
@@ -27,13 +27,20 @@ type charDevices mounts
|
||||
var _ Discover = (*charDevices)(nil)
|
||||
|
||||
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
|
||||
func NewCharDeviceDiscoverer(logger logger.Interface, devRoot string, devices []string) Discover {
|
||||
func NewCharDeviceDiscoverer(logger logger.Interface, devices []string, root string) Discover {
|
||||
locator := lookup.NewCharDeviceLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(devRoot),
|
||||
lookup.WithRoot(root),
|
||||
)
|
||||
|
||||
return (*charDevices)(newMounts(logger, locator, devRoot, devices))
|
||||
return NewDeviceDiscoverer(logger, locator, root, devices)
|
||||
}
|
||||
|
||||
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
|
||||
func NewDeviceDiscoverer(logger logger.Interface, locator lookup.Locator, root string, devices []string) Discover {
|
||||
m := NewMounts(logger, locator, root, devices).(*mounts)
|
||||
|
||||
return (*charDevices)(m)
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts for the charDevices.
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
func TestCharDevices(t *testing.T) {
|
||||
|
||||
@@ -30,8 +30,8 @@ type filtered struct {
|
||||
filter Filter
|
||||
}
|
||||
|
||||
// newFilteredDiscoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
|
||||
func newFilteredDiscoverer(logger logger.Interface, applyTo Discover, filter Filter) Discover {
|
||||
// newFilteredDisoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
|
||||
func newFilteredDisoverer(logger logger.Interface, applyTo Discover, filter Filter) Discover {
|
||||
return filtered{
|
||||
Discover: applyTo,
|
||||
logger: logger,
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
func NewGDRCopyDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
|
||||
return NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
[]string{"/dev/gdrdrv"},
|
||||
), nil
|
||||
}
|
||||
@@ -29,17 +29,17 @@ type gdsDeviceDiscoverer struct {
|
||||
}
|
||||
|
||||
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
|
||||
func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string) (Discover, error) {
|
||||
func NewGDSDiscoverer(logger logger.Interface, root string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
[]string{"/dev/nvidia-fs*"},
|
||||
root,
|
||||
)
|
||||
|
||||
udev := NewMounts(
|
||||
logger,
|
||||
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(driverRoot)),
|
||||
driverRoot,
|
||||
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(root)),
|
||||
root,
|
||||
[]string{"/run/udev"},
|
||||
)
|
||||
|
||||
@@ -47,9 +47,9 @@ func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithRoot(root),
|
||||
),
|
||||
driverRoot,
|
||||
root,
|
||||
[]string{"/etc/cufile.json"},
|
||||
)
|
||||
|
||||
|
||||
@@ -28,41 +28,53 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
)
|
||||
|
||||
// NewDRMNodesDiscoverer returns a discoverer for the DRM device nodes associated with the specified visible devices.
|
||||
//
|
||||
// TODO: The logic for creating DRM devices should be consolidated between this
|
||||
// and the logic for generating CDI specs for a single device. This is only used
|
||||
// when applying OCI spec modifications to an incoming spec in "legacy" mode.
|
||||
func NewDRMNodesDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot)
|
||||
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
|
||||
func NewGraphicsDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
|
||||
}
|
||||
|
||||
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
|
||||
}
|
||||
|
||||
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath)
|
||||
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, driverRoot, nvidiaCTKPath)
|
||||
|
||||
discover := Merge(
|
||||
Merge(drmDeviceNodes, drmByPathSymlinks),
|
||||
mounts,
|
||||
)
|
||||
|
||||
discover := Merge(drmDeviceNodes, drmByPathSymlinks)
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
// NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan.
|
||||
func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
|
||||
func NewGraphicsMountsDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
locator, err := lookup.NewLibraryLocator(logger, driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct library locator: %v", err)
|
||||
}
|
||||
libraries := NewMounts(
|
||||
logger,
|
||||
driver.Libraries(),
|
||||
driver.Root,
|
||||
locator,
|
||||
driverRoot,
|
||||
[]string{
|
||||
"libnvidia-egl-gbm.so.*",
|
||||
"libnvidia-egl-gbm.so",
|
||||
},
|
||||
)
|
||||
|
||||
jsonMounts := NewMounts(
|
||||
logger,
|
||||
driver.Configs(),
|
||||
driver.Root,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths("/etc", "/usr/share"),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{
|
||||
"glvnd/egl_vendor.d/10_nvidia.json",
|
||||
"vulkan/icd.d/nvidia_icd.json",
|
||||
@@ -74,7 +86,7 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, n
|
||||
},
|
||||
)
|
||||
|
||||
xorg := optionalXorgDiscoverer(logger, driver, nvidiaCTKPath)
|
||||
xorg := optionalXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
|
||||
|
||||
discover := Merge(
|
||||
libraries,
|
||||
@@ -89,16 +101,16 @@ type drmDevicesByPath struct {
|
||||
None
|
||||
logger logger.Interface
|
||||
nvidiaCTKPath string
|
||||
devRoot string
|
||||
driverRoot string
|
||||
devicesFrom Discover
|
||||
}
|
||||
|
||||
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
|
||||
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover {
|
||||
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, driverRoot string, nvidiaCTKPath string) Discover {
|
||||
d := drmDevicesByPath{
|
||||
logger: logger,
|
||||
nvidiaCTKPath: nvidiaCTKPath,
|
||||
devRoot: devRoot,
|
||||
driverRoot: driverRoot,
|
||||
devicesFrom: devices,
|
||||
}
|
||||
|
||||
@@ -136,7 +148,7 @@ func (d drmDevicesByPath) Hooks() ([]Hook, error) {
|
||||
return []Hook{hook}, nil
|
||||
}
|
||||
|
||||
// getSpecificLinkArgs returns the required specific links that need to be created
|
||||
// getSpecificLinkArgs returns the required specic links that need to be created
|
||||
func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error) {
|
||||
selectedDevices := make(map[string]bool)
|
||||
for _, d := range devices {
|
||||
@@ -145,7 +157,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
|
||||
|
||||
linkLocator := lookup.NewFileLocator(
|
||||
lookup.WithLogger(d.logger),
|
||||
lookup.WithRoot(d.devRoot),
|
||||
lookup.WithRoot(d.driverRoot),
|
||||
)
|
||||
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
|
||||
if err != nil {
|
||||
@@ -171,23 +183,27 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
|
||||
}
|
||||
|
||||
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
|
||||
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Discover, error) {
|
||||
allDevices := NewCharDeviceDiscoverer(
|
||||
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Discover, error) {
|
||||
allDevices := NewDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
lookup.NewCharDeviceLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{
|
||||
"/dev/dri/card*",
|
||||
"/dev/dri/renderD*",
|
||||
},
|
||||
)
|
||||
|
||||
filter, err := newDRMDeviceFilter(devices, devRoot)
|
||||
filter, err := newDRMDeviceFilter(logger, devices, driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
|
||||
}
|
||||
|
||||
// We return a discoverer that applies the DRM device filter created above to all discovered DRM device nodes.
|
||||
d := newFilteredDiscoverer(
|
||||
d := newFilteredDisoverer(
|
||||
logger,
|
||||
allDevices,
|
||||
filter,
|
||||
@@ -197,8 +213,8 @@ func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevice
|
||||
}
|
||||
|
||||
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
|
||||
func newDRMDeviceFilter(devices image.VisibleDevices, devRoot string) (Filter, error) {
|
||||
gpuInformationPaths, err := proc.GetInformationFilePaths(devRoot)
|
||||
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Filter, error) {
|
||||
gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read GPU information: %v", err)
|
||||
}
|
||||
@@ -225,7 +241,7 @@ func newDRMDeviceFilter(devices image.VisibleDevices, devRoot string) (Filter, e
|
||||
return nil, fmt.Errorf("failed to determine DRM devices for %v: %v", busID, err)
|
||||
}
|
||||
for _, drmDeviceNode := range drmDeviceNodes {
|
||||
filter[drmDeviceNode] = true
|
||||
filter[filepath.Join(drmDeviceNode)] = true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -242,8 +258,8 @@ var _ Discover = (*xorgHooks)(nil)
|
||||
|
||||
// optionalXorgDiscoverer creates a discoverer for Xorg libraries.
|
||||
// If the creation of the discoverer fails, a None discoverer is returned.
|
||||
func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) Discover {
|
||||
xorg, err := newXorgDiscoverer(logger, driver, nvidiaCTKPath)
|
||||
func optionalXorgDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) Discover {
|
||||
xorg, err := newXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
|
||||
if err != nil {
|
||||
logger.Warningf("Failed to create Xorg discoverer: %v; skipping xorg libraries", err)
|
||||
return None{}
|
||||
@@ -251,9 +267,10 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia
|
||||
return xorg
|
||||
}
|
||||
|
||||
func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
|
||||
func newXorgDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
libCudaPaths, err := cuda.New(
|
||||
driver.Libraries(),
|
||||
cuda.WithLogger(logger),
|
||||
cuda.WithDriverRoot(driverRoot),
|
||||
).Locate(".*.*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
|
||||
@@ -270,11 +287,11 @@ func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPa
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driver.Root),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths(libRoot, "/usr/lib/x86_64-linux-gnu"),
|
||||
lookup.WithCount(1),
|
||||
),
|
||||
driver.Root,
|
||||
driverRoot,
|
||||
[]string{
|
||||
"nvidia/xorg/nvidia_drv.so",
|
||||
fmt.Sprintf("nvidia/xorg/libglxserver_nvidia.so.%s", version),
|
||||
@@ -286,16 +303,20 @@ func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPa
|
||||
nvidiaCTKPath: nvidiaCTKPath,
|
||||
}
|
||||
|
||||
xorgConfig := NewMounts(
|
||||
xorgConfg := NewMounts(
|
||||
logger,
|
||||
driver.Configs(),
|
||||
driver.Root,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths("/usr/share"),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{"X11/xorg.conf.d/10-nvidia.conf"},
|
||||
)
|
||||
|
||||
d := Merge(
|
||||
xorgLibs,
|
||||
xorgConfig,
|
||||
xorgConfg,
|
||||
xorgHooks,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ package discover
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
)
|
||||
|
||||
var _ Discover = (*Hook)(nil)
|
||||
|
||||
@@ -19,10 +19,9 @@ package discover
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
func TestIPCMounts(t *testing.T) {
|
||||
|
||||
@@ -70,7 +70,7 @@ func (d *ipcMounts) Mounts() ([]Mount, error) {
|
||||
var modifiedMounts []Mount
|
||||
for _, m := range mounts {
|
||||
mount := m
|
||||
mount.Options = append(mount.Options, "noexec")
|
||||
mount.Options = append(m.Options, "noexec")
|
||||
modifiedMounts = append(modifiedMounts, mount)
|
||||
}
|
||||
|
||||
|
||||
@@ -25,11 +25,10 @@ import (
|
||||
)
|
||||
|
||||
// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified
|
||||
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath, ldconfigPath string) (Discover, error) {
|
||||
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath string) (Discover, error) {
|
||||
d := ldconfig{
|
||||
logger: logger,
|
||||
nvidiaCTKPath: nvidiaCTKPath,
|
||||
ldconfigPath: ldconfigPath,
|
||||
mountsFrom: mounts,
|
||||
}
|
||||
|
||||
@@ -40,7 +39,6 @@ type ldconfig struct {
|
||||
None
|
||||
logger logger.Interface
|
||||
nvidiaCTKPath string
|
||||
ldconfigPath string
|
||||
mountsFrom Discover
|
||||
}
|
||||
|
||||
@@ -52,20 +50,14 @@ func (d ldconfig) Hooks() ([]Hook, error) {
|
||||
}
|
||||
h := CreateLDCacheUpdateHook(
|
||||
d.nvidiaCTKPath,
|
||||
d.ldconfigPath,
|
||||
getLibraryPaths(mounts),
|
||||
)
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache
|
||||
func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []string) Hook {
|
||||
func CreateLDCacheUpdateHook(executable string, libraries []string) Hook {
|
||||
var args []string
|
||||
|
||||
if ldconfig != "" {
|
||||
args = append(args, "--ldconfig-path", ldconfig)
|
||||
}
|
||||
|
||||
for _, f := range uniqueFolders(libraries) {
|
||||
args = append(args, "--folder", f)
|
||||
}
|
||||
@@ -77,6 +69,7 @@ func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []str
|
||||
)
|
||||
|
||||
return hook
|
||||
|
||||
}
|
||||
|
||||
// getLibraryPaths extracts the library dirs from the specified mounts
|
||||
@@ -93,6 +86,7 @@ func getLibraryPaths(mounts []Mount) []string {
|
||||
|
||||
// isLibName checks if the specified filename is a library (i.e. ends in `.so*`)
|
||||
func isLibName(filename string) bool {
|
||||
|
||||
base := filepath.Base(filename)
|
||||
|
||||
isLib, err := filepath.Match("lib?*.so*", base)
|
||||
|
||||
@@ -26,7 +26,6 @@ import (
|
||||
|
||||
const (
|
||||
testNvidiaCTKPath = "/foo/bar/nvidia-ctk"
|
||||
testLdconfigPath = "/bar/baz/ldconfig"
|
||||
)
|
||||
|
||||
func TestLDCacheUpdateHook(t *testing.T) {
|
||||
@@ -34,7 +33,6 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
ldconfigPath string
|
||||
mounts []Mount
|
||||
mountError error
|
||||
expectedError error
|
||||
@@ -77,11 +75,6 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
||||
},
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"},
|
||||
},
|
||||
{
|
||||
description: "explicit ldconfig path is passed",
|
||||
ldconfigPath: testLdconfigPath,
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@@ -97,7 +90,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
||||
Lifecycle: "createContainer",
|
||||
}
|
||||
|
||||
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath, tc.ldconfigPath)
|
||||
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath)
|
||||
require.NoError(t, err)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
@@ -121,8 +114,10 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, mounts)
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIsLibName(t *testing.T) {
|
||||
|
||||
@@ -27,7 +27,7 @@ type list struct {
|
||||
|
||||
var _ Discover = (*list)(nil)
|
||||
|
||||
// Merge creates a discoverer that is the composite of a list of discoverers.
|
||||
// Merge creates a discoverer that is the composite of a list of discoveres.
|
||||
func Merge(d ...Discover) Discover {
|
||||
l := list{
|
||||
discoverers: d,
|
||||
|
||||
@@ -19,14 +19,14 @@ package discover
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
|
||||
func NewMOFEDDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
|
||||
func NewMOFEDDiscoverer(logger logger.Interface, root string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
[]string{
|
||||
"/dev/infiniband/uverbs*",
|
||||
"/dev/infiniband/rdma_cm",
|
||||
},
|
||||
root,
|
||||
)
|
||||
|
||||
return devices, nil
|
||||
|
||||
@@ -20,9 +20,8 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
)
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
// NewNvSwitchDiscoverer creates a discoverer for NVSWITCH devices.
|
||||
func NewNvSwitchDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
[]string{
|
||||
"/dev/nvidia-nvswitchctl",
|
||||
"/dev/nvidia-nvswitch*",
|
||||
},
|
||||
)
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
@@ -16,6 +16,15 @@
|
||||
|
||||
package dxcore
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
)
|
||||
|
||||
const (
|
||||
libraryName = "libdxcore.so"
|
||||
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
|
||||
)
|
||||
|
||||
// dxcore stores a reference the dxcore dynamic library
|
||||
var dxcore *context
|
||||
|
||||
|
||||
@@ -41,17 +41,14 @@ static const char * const dxcore_nvidia_driver_store_components[] = {
|
||||
*/
|
||||
|
||||
struct dxcore_enumAdapters2;
|
||||
struct dxcore_enumAdapters3;
|
||||
struct dxcore_queryAdapterInfo;
|
||||
|
||||
typedef int(*pfnDxcoreEnumAdapters2)(struct dxcore_enumAdapters2* pParams);
|
||||
typedef int(*pfnDxcoreEnumAdapters3)(struct dxcore_enumAdapters3* pParams);
|
||||
typedef int(*pfnDxcoreQueryAdapterInfo)(struct dxcore_queryAdapterInfo* pParams);
|
||||
|
||||
struct dxcore_lib {
|
||||
void* hDxcoreLib;
|
||||
pfnDxcoreEnumAdapters2 pDxcoreEnumAdapters2;
|
||||
pfnDxcoreEnumAdapters3 pDxcoreEnumAdapters3;
|
||||
pfnDxcoreQueryAdapterInfo pDxcoreQueryAdapterInfo;
|
||||
};
|
||||
|
||||
@@ -69,15 +66,6 @@ struct dxcore_enumAdapters2
|
||||
struct dxcore_adapterInfo *pAdapters;
|
||||
};
|
||||
|
||||
#define ENUMADAPTER3_FILTER_COMPUTE_ONLY (0x0000000000000001)
|
||||
|
||||
struct dxcore_enumAdapters3
|
||||
{
|
||||
unsigned long long Filter;
|
||||
unsigned int NumAdapters;
|
||||
struct dxcore_adapterInfo *pAdapters;
|
||||
};
|
||||
|
||||
enum dxcore_kmtqueryAdapterInfoType
|
||||
{
|
||||
DXCORE_QUERYDRIVERVERSION = 13,
|
||||
@@ -251,37 +239,7 @@ static void dxcore_add_adapter(struct dxcore_context* pCtx, struct dxcore_lib* p
|
||||
log_infof("Adding new adapter via dxcore hAdapter:%x luid:%llx wddm version:%d", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid), wddmVersion);
|
||||
}
|
||||
|
||||
static int dxcore_enum_adapters3(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
|
||||
{
|
||||
struct dxcore_enumAdapters3 params = {0};
|
||||
unsigned int adapterIndex = 0;
|
||||
|
||||
// Include compute-only in addition to display+compute adapters
|
||||
params.Filter = ENUMADAPTER3_FILTER_COMPUTE_ONLY;
|
||||
params.NumAdapters = 0;
|
||||
params.pAdapters = NULL;
|
||||
|
||||
if (pLib->pDxcoreEnumAdapters3(¶ms)) {
|
||||
log_err("Failed to enumerate adapters via enumAdapers3");
|
||||
return 1;
|
||||
}
|
||||
|
||||
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
|
||||
if (pLib->pDxcoreEnumAdapters3(¶ms)) {
|
||||
free(params.pAdapters);
|
||||
log_err("Failed to enumerate adapters via enumAdapers3");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
|
||||
dxcore_add_adapter(pCtx, pLib, ¶ms.pAdapters[adapterIndex]);
|
||||
}
|
||||
|
||||
free(params.pAdapters);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dxcore_enum_adapters2(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
|
||||
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
|
||||
{
|
||||
struct dxcore_enumAdapters2 params = {0};
|
||||
unsigned int adapterIndex = 0;
|
||||
@@ -290,15 +248,15 @@ static int dxcore_enum_adapters2(struct dxcore_context* pCtx, struct dxcore_lib*
|
||||
params.pAdapters = NULL;
|
||||
|
||||
if (pLib->pDxcoreEnumAdapters2(¶ms)) {
|
||||
log_err("Failed to enumerate adapters via enumAdapters2");
|
||||
return 1;
|
||||
log_err("Failed to enumerate adapters via dxcore");
|
||||
return;
|
||||
}
|
||||
|
||||
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
|
||||
if (pLib->pDxcoreEnumAdapters2(¶ms)) {
|
||||
free(params.pAdapters);
|
||||
log_err("Failed to enumerate adapters via enumAdapters2");
|
||||
return 1;
|
||||
log_err("Failed to enumerate adapters via dxcore");
|
||||
return;
|
||||
}
|
||||
|
||||
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
|
||||
@@ -306,27 +264,6 @@ static int dxcore_enum_adapters2(struct dxcore_context* pCtx, struct dxcore_lib*
|
||||
}
|
||||
|
||||
free(params.pAdapters);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
|
||||
{
|
||||
int status;
|
||||
if (pLib->pDxcoreEnumAdapters3) {
|
||||
status = dxcore_enum_adapters3(pCtx, pLib);
|
||||
if (status == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to EnumAdapters2 if the OS doesn't support EnumAdapters3
|
||||
if (pLib->pDxcoreEnumAdapters2) {
|
||||
status = dxcore_enum_adapters2(pCtx, pLib);
|
||||
if (status == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
log_err("Failed to enumerate adapters via dxcore");
|
||||
}
|
||||
|
||||
int dxcore_init_context(struct dxcore_context* pCtx)
|
||||
@@ -343,9 +280,8 @@ int dxcore_init_context(struct dxcore_context* pCtx)
|
||||
}
|
||||
|
||||
lib.pDxcoreEnumAdapters2 = (pfnDxcoreEnumAdapters2)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters2");
|
||||
lib.pDxcoreEnumAdapters3 = (pfnDxcoreEnumAdapters3)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters3");
|
||||
if (!lib.pDxcoreEnumAdapters2 && !lib.pDxcoreEnumAdapters3) {
|
||||
log_err("dxcore library is present but the symbols D3DKMTEnumAdapters2 and D3DKMTEnumAdapters3 are missing");
|
||||
if (!lib.pDxcoreEnumAdapters2) {
|
||||
log_err("dxcore library is present but the symbol D3DKMTEnumAdapters2 is missing");
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,9 +17,7 @@
|
||||
package dxcore
|
||||
|
||||
/*
|
||||
#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
|
||||
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
#include <dxcore.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package edits
|
||||
|
||||
import (
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
"tags.cncf.io/container-device-interface/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
)
|
||||
|
||||
type device discover.Device
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"tags.cncf.io/container-device-interface/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDeviceToSpec(t *testing.T) {
|
||||
|
||||
@@ -19,13 +19,12 @@ package edits
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
"tags.cncf.io/container-device-interface/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type edits struct {
|
||||
|
||||
@@ -19,9 +19,8 @@ package edits
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFromDiscovererAllowsMountsToIterate(t *testing.T) {
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package edits
|
||||
|
||||
import (
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
"tags.cncf.io/container-device-interface/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
)
|
||||
|
||||
type hook discover.Hook
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package edits
|
||||
|
||||
import (
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
"tags.cncf.io/container-device-interface/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
)
|
||||
|
||||
type mount discover.Mount
|
||||
|
||||
@@ -20,9 +20,9 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvml"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// additionalInfo allows for the info.Interface to be extened to implement the infoInterface.
|
||||
@@ -52,9 +52,7 @@ func (i additionalInfo) UsesNVGPUModule() (uses bool, reason string) {
|
||||
if ret != nvml.SUCCESS {
|
||||
return false, fmt.Sprintf("failed to initialize nvml: %v", ret)
|
||||
}
|
||||
defer func() {
|
||||
_ = i.nvmllib.Shutdown()
|
||||
}()
|
||||
defer i.nvmllib.Shutdown()
|
||||
|
||||
var names []string
|
||||
|
||||
|
||||
@@ -19,9 +19,9 @@ package info
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvml"
|
||||
"github.com/stretchr/testify/require"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
func TestUsesNVGPUModule(t *testing.T) {
|
||||
|
||||
@@ -17,12 +17,12 @@
|
||||
package info
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvml"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// infoInterface provides an alias for mocking.
|
||||
@@ -70,7 +70,7 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
|
||||
r.logger.Infof("Auto-detected mode as '%v'", rmode)
|
||||
}()
|
||||
|
||||
if image.OnlyFullyQualifiedCDIDevices() {
|
||||
if onlyFullyQualifiedCDIDevices(image) {
|
||||
return "cdi"
|
||||
}
|
||||
|
||||
@@ -89,3 +89,14 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
|
||||
|
||||
return "legacy"
|
||||
}
|
||||
|
||||
func onlyFullyQualifiedCDIDevices(image image.CUDA) bool {
|
||||
var hasCDIdevice bool
|
||||
for _, device := range image.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
||||
if !cdi.IsQualifiedName(device) {
|
||||
return false
|
||||
}
|
||||
hasCDIdevice = true
|
||||
}
|
||||
return hasCDIdevice
|
||||
}
|
||||
|
||||
@@ -19,11 +19,9 @@ package info
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
func TestResolveAutoMode(t *testing.T) {
|
||||
@@ -34,8 +32,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
mode string
|
||||
expectedMode string
|
||||
info map[string]bool
|
||||
envmap map[string]string
|
||||
mounts []string
|
||||
image image.CUDA
|
||||
}{
|
||||
{
|
||||
description: "non-auto resolves to input",
|
||||
@@ -122,7 +119,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
description: "cdi devices resolves to cdi",
|
||||
mode: "auto",
|
||||
expectedMode: "cdi",
|
||||
envmap: map[string]string{
|
||||
image: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=all",
|
||||
},
|
||||
},
|
||||
@@ -130,14 +127,14 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
description: "multiple cdi devices resolves to cdi",
|
||||
mode: "auto",
|
||||
expectedMode: "cdi",
|
||||
envmap: map[string]string{
|
||||
image: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,nvidia.com/gpu=1",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "at least one non-cdi device resolves to legacy",
|
||||
mode: "auto",
|
||||
envmap: map[string]string{
|
||||
image: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
||||
},
|
||||
info: map[string]bool{
|
||||
@@ -150,7 +147,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
{
|
||||
description: "at least one non-cdi device resolves to csv",
|
||||
mode: "auto",
|
||||
envmap: map[string]string{
|
||||
image: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
||||
},
|
||||
info: map[string]bool{
|
||||
@@ -160,44 +157,6 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
},
|
||||
expectedMode: "csv",
|
||||
},
|
||||
{
|
||||
description: "cdi mount devices resolves to CDI",
|
||||
mode: "auto",
|
||||
mounts: []string{
|
||||
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||
},
|
||||
expectedMode: "cdi",
|
||||
},
|
||||
{
|
||||
description: "cdi mount and non-CDI devices resolves to legacy",
|
||||
mode: "auto",
|
||||
mounts: []string{
|
||||
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||
"/var/run/nvidia-container-devices/all",
|
||||
},
|
||||
info: map[string]bool{
|
||||
"nvml": true,
|
||||
"tegra": false,
|
||||
"nvgpu": false,
|
||||
},
|
||||
expectedMode: "legacy",
|
||||
},
|
||||
{
|
||||
description: "cdi mount and non-CDI envvar resolves to legacy",
|
||||
mode: "auto",
|
||||
envmap: map[string]string{
|
||||
"NVIDIA_VISIBLE_DEVICES": "0",
|
||||
},
|
||||
mounts: []string{
|
||||
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||
},
|
||||
info: map[string]bool{
|
||||
"nvml": true,
|
||||
"tegra": false,
|
||||
"nvgpu": false,
|
||||
},
|
||||
expectedMode: "legacy",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@@ -218,20 +177,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
logger: logger,
|
||||
info: info,
|
||||
}
|
||||
|
||||
var mounts []specs.Mount
|
||||
for _, d := range tc.mounts {
|
||||
mount := specs.Mount{
|
||||
Source: "/dev/null",
|
||||
Destination: d,
|
||||
}
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.envmap),
|
||||
image.WithMounts(mounts),
|
||||
)
|
||||
mode := r.resolveMode(tc.mode, image)
|
||||
mode := r.resolveMode(tc.mode, tc.image)
|
||||
require.EqualValues(t, tc.expectedMode, mode)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package devices
|
||||
|
||||
type builder struct {
|
||||
asMap devices
|
||||
filter func(string) bool
|
||||
}
|
||||
|
||||
// New creates a new devices struct with the specified options.
|
||||
func New(opts ...Option) Devices {
|
||||
b := &builder{}
|
||||
for _, opt := range opts {
|
||||
opt(b)
|
||||
}
|
||||
|
||||
if b.filter == nil {
|
||||
b.filter = func(string) bool { return false }
|
||||
}
|
||||
|
||||
devices := make(devices)
|
||||
for k, v := range b.asMap {
|
||||
if b.filter(string(k)) {
|
||||
continue
|
||||
}
|
||||
devices[k] = v
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
type Option func(*builder)
|
||||
|
||||
// WithDeviceToMajor specifies an explicit device name to major number map.
|
||||
func WithDeviceToMajor(deviceToMajor map[string]int) Option {
|
||||
return func(b *builder) {
|
||||
b.asMap = make(devices)
|
||||
for name, major := range deviceToMajor {
|
||||
b.asMap[Name(name)] = Major(major)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WithFilter specifies a filter to exclude devices.
|
||||
func WithFilter(filter func(string) bool) Option {
|
||||
return func(b *builder) {
|
||||
b.filter = filter
|
||||
}
|
||||
}
|
||||
@@ -33,7 +33,7 @@ const (
|
||||
NVIDIAModesetMinor = 254
|
||||
|
||||
NVIDIAFrontend = Name("nvidia-frontend")
|
||||
NVIDIAGPU = Name("nvidia")
|
||||
NVIDIAGPU = NVIDIAFrontend
|
||||
NVIDIACaps = Name("nvidia-caps")
|
||||
NVIDIAUVM = Name("nvidia-uvm")
|
||||
|
||||
@@ -53,43 +53,22 @@ type Major int
|
||||
type Devices interface {
|
||||
Exists(Name) bool
|
||||
Get(Name) (Major, bool)
|
||||
Count() int
|
||||
}
|
||||
|
||||
type devices map[Name]Major
|
||||
|
||||
var _ Devices = devices(nil)
|
||||
|
||||
// Count returns the number of devices defined.
|
||||
func (d devices) Count() int {
|
||||
return len(d)
|
||||
}
|
||||
|
||||
// Exists checks if a Device with a given name exists or not
|
||||
func (d devices) Exists(name Name) bool {
|
||||
_, exists := d.Get(name)
|
||||
_, exists := d[name]
|
||||
return exists
|
||||
}
|
||||
|
||||
// Get a Device from Devices. It also has fallback logic to ensure device name changes in /proc/devices are handled
|
||||
// For e.g:- For GPU drivers 550.40.x or greater, the gpu device has been renamed from "nvidia-frontend" to "nvidia".
|
||||
// Get a Device from Devices
|
||||
func (d devices) Get(name Name) (Major, bool) {
|
||||
for _, n := range name.getWithFallback() {
|
||||
device, exists := d[n]
|
||||
if exists {
|
||||
return device, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// getWithFallback returns a prioritised list of device names for a specific name.
|
||||
// This allows multiple names to be associated with a single name to support various driver versions.
|
||||
func (n Name) getWithFallback() []Name {
|
||||
if n == NVIDIAGPU || n == NVIDIAFrontend {
|
||||
return []Name{NVIDIAGPU, NVIDIAFrontend}
|
||||
}
|
||||
return []Name{n}
|
||||
device, exists := d[name]
|
||||
return device, exists
|
||||
}
|
||||
|
||||
// GetNVIDIADevices returns the set of NVIDIA Devices on the machine
|
||||
@@ -115,23 +94,27 @@ func nvidiaDevices(devicesPath string) (Devices, error) {
|
||||
|
||||
var errNoNvidiaDevices = errors.New("no NVIDIA devices found")
|
||||
|
||||
func nvidiaDeviceFrom(reader io.Reader) (Devices, error) {
|
||||
func nvidiaDeviceFrom(reader io.Reader) (devices, error) {
|
||||
allDevices := devicesFrom(reader)
|
||||
nvidiaDevices := make(devices)
|
||||
|
||||
nvidiaDevices := New(
|
||||
WithDeviceToMajor(allDevices),
|
||||
WithFilter(func(n string) bool {
|
||||
return !strings.HasPrefix(n, nvidiaDevicePrefix)
|
||||
}),
|
||||
)
|
||||
if nvidiaDevices.Count() == 0 {
|
||||
var hasNvidiaDevices bool
|
||||
for n, d := range allDevices {
|
||||
if !strings.HasPrefix(string(n), nvidiaDevicePrefix) {
|
||||
continue
|
||||
}
|
||||
nvidiaDevices[n] = d
|
||||
hasNvidiaDevices = true
|
||||
}
|
||||
|
||||
if !hasNvidiaDevices {
|
||||
return nil, errNoNvidiaDevices
|
||||
}
|
||||
return nvidiaDevices, nil
|
||||
}
|
||||
|
||||
func devicesFrom(reader io.Reader) map[string]int {
|
||||
allDevices := make(map[string]int)
|
||||
func devicesFrom(reader io.Reader) devices {
|
||||
allDevices := make(devices)
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
device, major, err := processProcDeviceLine(scanner.Text())
|
||||
@@ -143,11 +126,11 @@ func devicesFrom(reader io.Reader) map[string]int {
|
||||
return allDevices
|
||||
}
|
||||
|
||||
func processProcDeviceLine(line string) (string, int, error) {
|
||||
func processProcDeviceLine(line string) (Name, Major, error) {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
|
||||
var name string
|
||||
var major int
|
||||
var name Name
|
||||
var major Major
|
||||
|
||||
n, _ := fmt.Sscanf(trimmed, "%d %s", &major, &name)
|
||||
if n == 2 {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user