mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
25 Commits
v1.15.0-rc
...
v1.14.5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9ea3360701 | ||
|
|
b6987c526a | ||
|
|
3604927034 | ||
|
|
640bd6ee3f | ||
|
|
6593f3c2e4 | ||
|
|
d52a237c12 | ||
|
|
9d9260db8c | ||
|
|
888fe458ae | ||
|
|
d167812ce3 | ||
|
|
7ff23999e8 | ||
|
|
a9b01a43bc | ||
|
|
ccff00bc30 | ||
|
|
f7d54200c6 | ||
|
|
29fd206f3a | ||
|
|
cfe0d5d07e | ||
|
|
9ab640b2be | ||
|
|
9d2e4b48bc | ||
|
|
c050bcf081 | ||
|
|
27d0fa4ee2 | ||
|
|
e0e22fdceb | ||
|
|
c1eae0deda | ||
|
|
68f0203a49 | ||
|
|
cc688f7c75 | ||
|
|
7566eb124a | ||
|
|
eb5d50abc4 |
@@ -19,6 +19,7 @@ default:
|
||||
|
||||
variables:
|
||||
GIT_SUBMODULE_STRATEGY: recursive
|
||||
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
|
||||
BUILD_MULTI_ARCH_IMAGES: "true"
|
||||
|
||||
stages:
|
||||
|
||||
20
.github/dependabot.yml
vendored
20
.github/dependabot.yml
vendored
@@ -1,20 +0,0 @@
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "gomod"
|
||||
target-branch: main
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "sunday"
|
||||
ignore:
|
||||
- dependency-name: k8s.io/*
|
||||
labels:
|
||||
- dependencies
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
56
.github/workflows/golang.yaml
vendored
56
.github/workflows/golang.yaml
vendored
@@ -1,56 +0,0 @@
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: Golang
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Lint
|
||||
uses: golangci/golangci-lint-action@v3
|
||||
with:
|
||||
version: latest
|
||||
args: -v --timeout 5m
|
||||
skip-cache: true
|
||||
test:
|
||||
name: Unit test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.20'
|
||||
- run: make test
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Build
|
||||
run: make docker-build
|
||||
108
.github/workflows/image.yaml
vendored
108
.github/workflows/image.yaml
vendored
@@ -1,108 +0,0 @@
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Run this workflow on pull requests
|
||||
name: image
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release-*
|
||||
|
||||
jobs:
|
||||
packages:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
package:
|
||||
- ubuntu18.04-arm64
|
||||
- ubuntu18.04-amd64
|
||||
- ubuntu18.04-ppc64le
|
||||
- centos7-aarch64
|
||||
- centos7-x86_64
|
||||
- centos8-ppc64le
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: build ${{ matrix.package }} packages
|
||||
run: |
|
||||
sudo apt-get install -y coreutils build-essential sed git bash make
|
||||
echo "Building packages"
|
||||
./scripts/build-packages.sh ${{ matrix.package }}
|
||||
- name: 'Upload Artifacts'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
compression-level: 0
|
||||
name: toolkit-container-${{ matrix.package }}-${{ github.run_id }}
|
||||
path: ${{ github.workspace }}/dist/*
|
||||
|
||||
image:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
image: [ubuntu20.04, centos7, ubi8, packaging]
|
||||
needs: packages
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Check out code
|
||||
- name: Calculate build vars
|
||||
id: vars
|
||||
run: |
|
||||
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
|
||||
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
|
||||
REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
|
||||
echo "${REPO_FULL_NAME}"
|
||||
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
|
||||
|
||||
GENERATE_ARTIFACTS="false"
|
||||
if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
|
||||
GENERATE_ARTIFACTS="false"
|
||||
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
|
||||
GENERATE_ARTIFACTS="true"
|
||||
elif [[ "${{ github.event_name }}" == "push" ]]; then
|
||||
GENERATE_ARTIFACTS="true"
|
||||
fi
|
||||
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
|
||||
echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Get built packages
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: ${{ github.workspace }}/dist/
|
||||
pattern: toolkit-container-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Build image
|
||||
env:
|
||||
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/container-toolkit
|
||||
VERSION: ${COMMIT_SHORT_SHA}
|
||||
run: |
|
||||
echo "${VERSION}"
|
||||
make -f build/container/Makefile build-${{ matrix.image }}
|
||||
22
.github/workflows/pre-sanity.yml
vendored
Normal file
22
.github/workflows/pre-sanity.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: Run pre sanity
|
||||
|
||||
# run this workflow for each commit
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Build dev image
|
||||
run: make .build-image
|
||||
|
||||
- name: Build
|
||||
run: make docker-build
|
||||
|
||||
- name: Tests
|
||||
run: make docker-coverage
|
||||
|
||||
- name: Checks
|
||||
run: make docker-check
|
||||
@@ -15,6 +15,68 @@
|
||||
include:
|
||||
- .common-ci.yml
|
||||
|
||||
build-dev-image:
|
||||
stage: image
|
||||
script:
|
||||
- apk --no-cache add make bash
|
||||
- make .build-image
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- make .push-build-image
|
||||
|
||||
.requires-build-image:
|
||||
image: "${BUILDIMAGE}"
|
||||
|
||||
.go-check:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-checks
|
||||
|
||||
fmt:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make assert-fmt
|
||||
|
||||
vet:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make vet
|
||||
|
||||
lint:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make lint
|
||||
allow_failure: true
|
||||
|
||||
ineffassign:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make ineffassign
|
||||
allow_failure: true
|
||||
|
||||
misspell:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make misspell
|
||||
|
||||
go-build:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-build
|
||||
script:
|
||||
- make build
|
||||
|
||||
unit-tests:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: unit-tests
|
||||
script:
|
||||
- make coverage
|
||||
|
||||
# Define the package build helpers
|
||||
.multi-arch-build:
|
||||
before_script:
|
||||
@@ -76,24 +138,12 @@ package-centos7-x86_64:
|
||||
- .dist-centos7
|
||||
- .arch-x86_64
|
||||
|
||||
package-centos8-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-aarch64
|
||||
|
||||
package-centos8-ppc64le:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-ppc64le
|
||||
|
||||
package-centos8-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-x86_64
|
||||
|
||||
package-ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .package-build
|
||||
@@ -176,8 +226,6 @@ image-packaging:
|
||||
- .package-artifacts
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- job: package-centos8-aarch64
|
||||
- job: package-centos8-x86_64
|
||||
- job: package-ubuntu18.04-amd64
|
||||
- job: package-ubuntu18.04-arm64
|
||||
- job: package-amazonlinux2-aarch64
|
||||
@@ -254,3 +302,4 @@ test-docker-ubuntu20.04:
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,4 +1,4 @@
|
||||
[submodule "third_party/libnvidia-container"]
|
||||
path = third_party/libnvidia-container
|
||||
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
|
||||
branch = main
|
||||
branch = release-1.14
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
run:
|
||||
deadline: 10m
|
||||
|
||||
linters:
|
||||
enable:
|
||||
- contextcheck
|
||||
- gocritic
|
||||
- gofmt
|
||||
- goimports
|
||||
- gosec
|
||||
- gosimple
|
||||
- govet
|
||||
- ineffassign
|
||||
- misspell
|
||||
- staticcheck
|
||||
- unconvert
|
||||
|
||||
linters-settings:
|
||||
goimports:
|
||||
local-prefixes: github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
issues:
|
||||
exclude-rules:
|
||||
# Exclude the gocritic dupSubExpr issue for cgo files.
|
||||
- path: internal/dxcore/dxcore.go
|
||||
linters:
|
||||
- gocritic
|
||||
text: dupSubExpr
|
||||
# Exclude the checks for usage of returns to config.Delete(Path) in the crio and containerd config packages.
|
||||
- path: pkg/config/engine/
|
||||
linters:
|
||||
- errcheck
|
||||
text: config.Delete
|
||||
39
CHANGELOG.md
39
CHANGELOG.md
@@ -1,36 +1,23 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.15.0-rc.3
|
||||
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.
|
||||
## v1.14.5
|
||||
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker. This was incorrectly setting `experimental = true` instead
|
||||
of setting `features.cdi = true`.
|
||||
|
||||
## v1.15.0-rc.2
|
||||
* Extend the `runtime.nvidia.com/gpu` CDI kind to support full-GPUs and MIG devices specified by index or UUID.
|
||||
* Fix bug when specifying `--dev-root` for Tegra-based systems.
|
||||
* Log explicitly requested runtime mode.
|
||||
* Remove package dependency on libseccomp.
|
||||
* Added detection of libnvdxgdmal.so.1 on WSL2
|
||||
* Use devRoot to resolve MIG device nodes.
|
||||
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
|
||||
* Add `crun` to the list of configured low-level runtimes.
|
||||
* Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command.
|
||||
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker.
|
||||
* Add discovery of the GDRCopy device (`gdrdrv`) if the `NVIDIA_GDRCOPY` environment variable of the container is set to `enabled`
|
||||
|
||||
* [toolkit-container] Bump CUDA base image version to 12.3.1.
|
||||
|
||||
## v1.15.0-rc.1
|
||||
* Skip update of ldcache in containers without ldconfig. The .so.SONAME symlinks are still created.
|
||||
* Normalize ldconfig path on use. This automatically adjust the ldconfig setting applied to ldconfig.real on systems where this exists.
|
||||
## v1.14.4
|
||||
* Include `nvidia/nvoptix.bin` in list of graphics mounts.
|
||||
* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts.
|
||||
* Add support for `--library-search-paths` to `nvidia-ctk cdi generate` command.
|
||||
* Add support for injecting /dev/nvidia-nvswitch* devices if the NVIDIA_NVSWITCH=enabled envvar is specified.
|
||||
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
|
||||
* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly.
|
||||
* Add `--relative-to` option to `nvidia-ctk transform root` command. This controls whether the root transformation is applied to host or container paths.
|
||||
* Added automatic CDI spec generation when the `runtime.nvidia.com/gpu=all` device is requested by a container.
|
||||
* Log explicitly requested runtime mode.
|
||||
* Remove package dependency on libseccomp.
|
||||
* Added detection of libnvdxgdmal.so.1 on WSL2.
|
||||
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
|
||||
* Add `crun` to the list of configured low-level runtimes.
|
||||
* Add `--cdi.enabled` option to `nvidia-ctk runtime configure` command to enable CDI in containerd.
|
||||
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
|
||||
|
||||
* [libnvidia-container] Fix device permission check when using cgroupv2 (fixes #227)
|
||||
* [toolkit-container] Bump CUDA base image version to 12.3.1.
|
||||
* [libnvidia-container] Added detection of libnvdxgdmal.so.1 on WSL2.
|
||||
|
||||
## v1.14.3
|
||||
* [toolkit-container] Bump CUDA base image version to 12.2.2.
|
||||
|
||||
@@ -19,7 +19,7 @@ where `TARGET` is a make target that is valid for each of the sub-components.
|
||||
|
||||
These include:
|
||||
* `ubuntu18.04-amd64`
|
||||
* `centos8-x86_64`
|
||||
* `centos7-x86_64`
|
||||
|
||||
If no `TARGET` is specified, all valid release targets are built.
|
||||
|
||||
|
||||
68
Makefile
68
Makefile
@@ -38,7 +38,7 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
|
||||
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
|
||||
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
|
||||
|
||||
CHECK_TARGETS := golangci-lint
|
||||
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
|
||||
|
||||
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
|
||||
@@ -78,13 +78,30 @@ fmt:
|
||||
go list -f '{{.Dir}}' $(MODULE)/... \
|
||||
| xargs gofmt -s -l -w
|
||||
|
||||
# Apply goimports -local github.com/NVIDIA/container-toolkit to the codebase
|
||||
goimports:
|
||||
go list -f {{.Dir}} $(MODULE)/... \
|
||||
| xargs goimports -local $(MODULE) -w
|
||||
assert-fmt:
|
||||
go list -f '{{.Dir}}' $(MODULE)/... \
|
||||
| xargs gofmt -s -l > fmt.out
|
||||
@if [ -s fmt.out ]; then \
|
||||
echo "\nERROR: The following files are not formatted:\n"; \
|
||||
cat fmt.out; \
|
||||
rm fmt.out; \
|
||||
exit 1; \
|
||||
else \
|
||||
rm fmt.out; \
|
||||
fi
|
||||
|
||||
golangci-lint:
|
||||
golangci-lint run ./...
|
||||
ineffassign:
|
||||
ineffassign $(MODULE)/...
|
||||
|
||||
lint:
|
||||
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
|
||||
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
|
||||
|
||||
misspell:
|
||||
misspell $(MODULE)/...
|
||||
|
||||
vet:
|
||||
go vet $(MODULE)/...
|
||||
|
||||
licenses:
|
||||
go-licenses csv $(MODULE)/...
|
||||
@@ -100,14 +117,32 @@ coverage: test
|
||||
generate:
|
||||
go generate $(MODULE)/...
|
||||
|
||||
$(DOCKER_TARGETS): docker-%:
|
||||
@echo "Running 'make $(*)' in container image $(BUILDIMAGE)"
|
||||
# Generate an image for containerized builds
|
||||
# Note: This image is local only
|
||||
.PHONY: .build-image .pull-build-image .push-build-image
|
||||
.build-image: docker/Dockerfile.devel
|
||||
if [ x"$(SKIP_IMAGE_BUILD)" = x"" ]; then \
|
||||
$(DOCKER) build \
|
||||
--progress=plain \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
-f $(^) \
|
||||
docker; \
|
||||
fi
|
||||
|
||||
.pull-build-image:
|
||||
$(DOCKER) pull $(BUILDIMAGE)
|
||||
|
||||
.push-build-image:
|
||||
$(DOCKER) push $(BUILDIMAGE)
|
||||
|
||||
$(DOCKER_TARGETS): docker-%: .build-image
|
||||
@echo "Running 'make $(*)' in docker container $(BUILDIMAGE)"
|
||||
$(DOCKER) run \
|
||||
--rm \
|
||||
-e GOCACHE=/tmp/.cache/go \
|
||||
-e GOMODCACHE=/tmp/.cache/gomod \
|
||||
-v $(PWD):/work \
|
||||
-w /work \
|
||||
-e GOCACHE=/tmp/.cache \
|
||||
-v $(PWD):$(PWD) \
|
||||
-w $(PWD) \
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE) \
|
||||
make $(*)
|
||||
@@ -118,9 +153,8 @@ PHONY: .shell
|
||||
$(DOCKER) run \
|
||||
--rm \
|
||||
-ti \
|
||||
-e GOCACHE=/tmp/.cache/go \
|
||||
-e GOMODCACHE=/tmp/.cache/gomod \
|
||||
-v $(PWD):/work \
|
||||
-w /work \
|
||||
-e GOCACHE=/tmp/.cache \
|
||||
-v $(PWD):$(PWD) \
|
||||
-w $(PWD) \
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE)
|
||||
|
||||
@@ -145,9 +145,7 @@ test-packaging:
|
||||
@echo "Testing package image contents"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/aarch64" || echo "Missing centos7/aarch64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"
|
||||
|
||||
@@ -9,10 +9,9 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -175,7 +174,7 @@ func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *stri
|
||||
// if specified.
|
||||
var hasSwarmEnvvar bool
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if image.HasEnvvar(envvar) {
|
||||
if _, exists := image[envvar]; exists {
|
||||
hasSwarmEnvvar = true
|
||||
break
|
||||
}
|
||||
@@ -258,31 +257,28 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMigConfigDevices(image image.CUDA) *string {
|
||||
return getMigDevices(image, envNVMigConfigDevices)
|
||||
}
|
||||
|
||||
func getMigMonitorDevices(image image.CUDA) *string {
|
||||
return getMigDevices(image, envNVMigMonitorDevices)
|
||||
}
|
||||
|
||||
func getMigDevices(image image.CUDA, envvar string) *string {
|
||||
if !image.HasEnvvar(envvar) {
|
||||
return nil
|
||||
func getMigConfigDevices(env map[string]string) *string {
|
||||
if devices, ok := env[envNVMigConfigDevices]; ok {
|
||||
return &devices
|
||||
}
|
||||
devices := image.Getenv(envvar)
|
||||
return &devices
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
|
||||
func getMigMonitorDevices(env map[string]string) *string {
|
||||
if devices, ok := env[envNVMigMonitorDevices]; ok {
|
||||
return &devices
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
|
||||
// We use the default driver capabilities by default. This is filtered to only include the
|
||||
// supported capabilities
|
||||
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
|
||||
|
||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||
|
||||
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
|
||||
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
|
||||
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
|
||||
|
||||
if !capsEnvSpecified && legacyImage {
|
||||
// Environment variable unset with legacy image: set all capabilities.
|
||||
|
||||
@@ -5,9 +5,8 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetNvidiaConfig(t *testing.T) {
|
||||
@@ -466,9 +465,6 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
// Wrap the call to getNvidiaConfig() in a closure.
|
||||
var config *nvidiaConfig
|
||||
getConfig := func() {
|
||||
@@ -477,7 +473,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
defaultConfig, _ := getDefaultHookConfig()
|
||||
hookConfig = &defaultConfig
|
||||
}
|
||||
config = getNvidiaConfig(hookConfig, image, nil, tc.privileged)
|
||||
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
|
||||
}
|
||||
|
||||
// For any tests that are expected to panic, make sure they do.
|
||||
@@ -682,17 +678,13 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
||||
// Wrap the call to getDevices() in a closure.
|
||||
var devices *string
|
||||
getDevices := func() {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(
|
||||
map[string]string{
|
||||
envNVVisibleDevices: tc.envvarDevices,
|
||||
},
|
||||
),
|
||||
)
|
||||
env := map[string]string{
|
||||
envNVVisibleDevices: tc.envvarDevices,
|
||||
}
|
||||
hookConfig, _ := getDefaultHookConfig()
|
||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged)
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
@@ -913,10 +905,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
|
||||
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
|
||||
if tc.expectedDevices == nil {
|
||||
require.Nil(t, devices, "%d: %v", i, tc)
|
||||
return
|
||||
@@ -1026,17 +1015,14 @@ func TestGetDriverCapabilities(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
var capabilities string
|
||||
var capabilites string
|
||||
|
||||
c := HookConfig{
|
||||
SupportedDriverCapabilities: tc.supportedCapabilities,
|
||||
}
|
||||
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
getDriverCapabilities := func() {
|
||||
capabilities = c.getDriverCapabilities(image, tc.legacyImage).String()
|
||||
capabilites = c.getDriverCapabilities(tc.env, tc.legacyImage).String()
|
||||
}
|
||||
|
||||
if tc.expectedPanic {
|
||||
@@ -1045,7 +1031,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
||||
}
|
||||
|
||||
getDriverCapabilities()
|
||||
require.EqualValues(t, tc.expectedCapabilities, capabilities)
|
||||
require.EqualValues(t, tc.expectedCapabilities, capabilites)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ const (
|
||||
driverPath = "/run/nvidia/driver"
|
||||
)
|
||||
|
||||
var defaultPaths = [...]string{}
|
||||
|
||||
// HookConfig : options for the nvidia-container-runtime-hook.
|
||||
type HookConfig config.Config
|
||||
|
||||
|
||||
@@ -21,9 +21,8 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetHookConfig(t *testing.T) {
|
||||
|
||||
@@ -111,8 +111,8 @@ func doPrestart() {
|
||||
}
|
||||
args = append(args, "configure")
|
||||
|
||||
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
|
||||
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
|
||||
if cli.Ldconfig != "" {
|
||||
args = append(args, fmt.Sprintf("--ldconfig=%s", cli.Ldconfig))
|
||||
}
|
||||
if cli.NoCgroups {
|
||||
args = append(args, "--no-cgroups")
|
||||
@@ -142,7 +142,6 @@ func doPrestart() {
|
||||
args = append(args, rootfs)
|
||||
|
||||
env := append(os.Environ(), cli.Environment...)
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection?
|
||||
err = syscall.Exec(args[0], args, env)
|
||||
log.Panicln("exec failed:", err)
|
||||
}
|
||||
|
||||
@@ -85,126 +85,3 @@ Alternatively the NVIDIA Container Runtime can be set as the default runtime for
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Environment variables (OCI spec)
|
||||
|
||||
Each environment variable maps to an command-line argument for `nvidia-container-cli` from [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
|
||||
These variables are already set in our [official CUDA images](https://hub.docker.com/r/nvidia/cuda/).
|
||||
|
||||
### `NVIDIA_VISIBLE_DEVICES`
|
||||
This variable controls which GPUs will be made accessible inside the container.
|
||||
|
||||
#### Possible values
|
||||
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
|
||||
* `all`: all GPUs will be accessible, this is the default value in our container images.
|
||||
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
|
||||
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
|
||||
|
||||
**Note**: When running on a MIG capable device, the following values will also be available:
|
||||
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
|
||||
|
||||
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
|
||||
```
|
||||
$ nvidia-smi -L
|
||||
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
|
||||
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
|
||||
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
|
||||
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
|
||||
```
|
||||
|
||||
### `NVIDIA_MIG_CONFIG_DEVICES`
|
||||
This variable controls which of the visible GPUs can have their MIG
|
||||
configuration managed from within the container. This includes enabling and
|
||||
disabling MIG mode, creating and destroying GPU Instances and Compute
|
||||
Instances, etc.
|
||||
|
||||
#### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG configurations managed.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
|
||||
|
||||
### `NVIDIA_MIG_MONITOR_DEVICES`
|
||||
This variable controls which of the visible GPUs can have aggregate information
|
||||
about all of their MIG devices monitored from within the container. This
|
||||
includes inspecting the aggregate memory usage, listing the aggregate running
|
||||
processes, etc.
|
||||
|
||||
#### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG devices monitored.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
|
||||
|
||||
### `NVIDIA_DRIVER_CAPABILITIES`
|
||||
This option controls which driver libraries/binaries will be mounted inside the container.
|
||||
|
||||
#### Possible values
|
||||
* `compute,video`, `graphics,utility` …: a comma-separated list of driver features the container needs.
|
||||
* `all`: enable all available driver capabilities.
|
||||
* *empty* or *unset*: use default driver capability: `utility,compute`.
|
||||
|
||||
#### Supported driver capabilities
|
||||
* `compute`: required for CUDA and OpenCL applications.
|
||||
* `compat32`: required for running 32-bit applications.
|
||||
* `graphics`: required for running OpenGL and Vulkan applications.
|
||||
* `utility`: required for using `nvidia-smi` and NVML.
|
||||
* `video`: required for using the Video Codec SDK.
|
||||
* `display`: required for leveraging X11 display.
|
||||
|
||||
### `NVIDIA_REQUIRE_*`
|
||||
A logical expression to define constraints on the configurations supported by the container.
|
||||
|
||||
#### Supported constraints
|
||||
* `cuda`: constraint on the CUDA driver version.
|
||||
* `driver`: constraint on the driver version.
|
||||
* `arch`: constraint on the compute architectures of the selected GPUs.
|
||||
* `brand`: constraint on the brand of the selected GPUs (e.g. GeForce, Tesla, GRID).
|
||||
|
||||
#### Expressions
|
||||
Multiple constraints can be expressed in a single environment variable: space-separated constraints are ORed, comma-separated constraints are ANDed.
|
||||
Multiple environment variables of the form `NVIDIA_REQUIRE_*` are ANDed together.
|
||||
|
||||
### `NVIDIA_DISABLE_REQUIRE`
|
||||
Single switch to disable all the constraints of the form `NVIDIA_REQUIRE_*`.
|
||||
|
||||
### `NVIDIA_REQUIRE_CUDA`
|
||||
|
||||
The version of the CUDA toolkit used by the container. It is an instance of the generic `NVIDIA_REQUIRE_*` case and it is set by official CUDA images.
|
||||
If the version of the NVIDIA driver is insufficient to run this version of CUDA, the container will not be started.
|
||||
|
||||
#### Possible values
|
||||
* `cuda>=7.5`, `cuda>=8.0`, `cuda>=9.0` …: any valid CUDA version in the form `major.minor`.
|
||||
|
||||
### `CUDA_VERSION`
|
||||
Similar to `NVIDIA_REQUIRE_CUDA`, for legacy CUDA images.
|
||||
In addition, if `NVIDIA_REQUIRE_CUDA` is not set, `NVIDIA_VISIBLE_DEVICES` and `NVIDIA_DRIVER_CAPABILITIES` will default to `all`.
|
||||
|
||||
## Usage example
|
||||
|
||||
**NOTE:** The use of the `nvidia-container-runtime` as CLI replacement for `runc` is uncommon and is only provided for completeness.
|
||||
|
||||
Although the `nvidia-container-runtime` is typically configured as a replacement for `runc` or `crun` in various container engines, it can also be
|
||||
invoked from the command line as `runc` would. For example:
|
||||
|
||||
```sh
|
||||
# Setup a rootfs based on Ubuntu 16.04
|
||||
cd $(mktemp -d) && mkdir rootfs
|
||||
curl -sS http://cdimage.ubuntu.com/ubuntu-base/releases/16.04/release/ubuntu-base-16.04-core-amd64.tar.gz | tar --exclude 'dev/*' -C rootfs -xz
|
||||
|
||||
# Create an OCI runtime spec
|
||||
nvidia-container-runtime spec
|
||||
sed -i 's;"sh";"nvidia-smi";' config.json
|
||||
sed -i 's;\("TERM=xterm"\);\1, "NVIDIA_VISIBLE_DEVICES=0";' config.json
|
||||
|
||||
# Run the container
|
||||
sudo nvidia-container-runtime run nvidia_smi
|
||||
```
|
||||
|
||||
@@ -3,7 +3,7 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
@@ -11,12 +11,11 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -87,7 +86,6 @@ func TestBadInput(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
@@ -105,7 +103,6 @@ func TestGoodInput(t *testing.T) {
|
||||
t.Fatalf("error generating runtime spec: %v", err)
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
|
||||
output, err := cmdRun.CombinedOutput()
|
||||
@@ -116,7 +113,6 @@ func TestGoodInput(t *testing.T) {
|
||||
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
|
||||
require.Empty(t, spec.Hooks, "there should be no hooks in config.json")
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
@@ -162,7 +158,6 @@ func TestDuplicateHook(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test how runtime handles already existing prestart hook in config.json
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
output, err := cmdCreate.CombinedOutput()
|
||||
@@ -193,16 +188,15 @@ func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
|
||||
}
|
||||
defer jsonFile.Close()
|
||||
|
||||
jsonContent, err := io.ReadAll(jsonFile)
|
||||
switch {
|
||||
case err != nil:
|
||||
jsonContent, err := ioutil.ReadAll(jsonFile)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
case json.Valid(jsonContent):
|
||||
} else if json.Valid(jsonContent) {
|
||||
err = json.Unmarshal(jsonContent, &spec)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
}
|
||||
default:
|
||||
} else {
|
||||
err = json.NewDecoder(bytes.NewReader(jsonContent)).Decode(&spec)
|
||||
if err != nil {
|
||||
return spec, err
|
||||
@@ -232,7 +226,6 @@ func (c testConfig) generateNewRuntimeSpec() error {
|
||||
return err
|
||||
}
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
cmd := exec.Command("cp", c.unmodifiedSpecFile(), c.specFilePath())
|
||||
err = cmd.Run()
|
||||
if err != nil {
|
||||
|
||||
@@ -43,7 +43,7 @@ By default, all commands output to `STDOUT`, but specifying the `--output` flag
|
||||
|
||||
### Generate CDI specifications
|
||||
|
||||
The [Container Device Interface (CDI)](https://tags.cncf.io/container-device-interface) provides
|
||||
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
|
||||
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
|
||||
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
|
||||
available NVIDIA GPUs in a system.
|
||||
|
||||
@@ -17,12 +17,11 @@
|
||||
package cdi
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/list"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -23,7 +23,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
cdi "tags.cncf.io/container-device-interface/pkg/parser"
|
||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
@@ -46,9 +46,7 @@ type options struct {
|
||||
format string
|
||||
deviceNameStrategy string
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCTKPath string
|
||||
ldconfigPath string
|
||||
mode string
|
||||
vendor string
|
||||
class string
|
||||
@@ -104,11 +102,6 @@ func (m command) build() *cli.Command {
|
||||
Value: nvcdi.ModeAuto,
|
||||
Destination: &opts.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "dev-root",
|
||||
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
|
||||
Destination: &opts.devRoot,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "device-name-strategy",
|
||||
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
|
||||
@@ -130,11 +123,6 @@ func (m command) build() *cli.Command {
|
||||
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
|
||||
Destination: &opts.nvidiaCTKPath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "ldconfig-path",
|
||||
Usage: "Specify the path to use for ldconfig in the generated CDI specification",
|
||||
Destination: &opts.ldconfigPath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "vendor",
|
||||
Aliases: []string{"cdi-vendor"},
|
||||
@@ -249,11 +237,9 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
|
||||
cdilib, err := nvcdi.New(
|
||||
nvcdi.WithLogger(m.logger),
|
||||
nvcdi.WithDriverRoot(opts.driverRoot),
|
||||
nvcdi.WithDevRoot(opts.devRoot),
|
||||
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
|
||||
nvcdi.WithLdconfigPath(opts.ldconfigPath),
|
||||
nvcdi.WithDeviceNamer(deviceNamer),
|
||||
nvcdi.WithMode(opts.mode),
|
||||
nvcdi.WithMode(string(opts.mode)),
|
||||
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
|
||||
nvcdi.WithCSVFiles(opts.csv.files.Value()),
|
||||
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
|
||||
|
||||
@@ -26,9 +26,14 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
)
|
||||
|
||||
type loadSaver interface {
|
||||
Load() (spec.Interface, error)
|
||||
Save(spec.Interface) error
|
||||
}
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
@@ -40,9 +45,8 @@ type transformOptions struct {
|
||||
|
||||
type options struct {
|
||||
transformOptions
|
||||
from string
|
||||
to string
|
||||
relativeTo string
|
||||
from string
|
||||
to string
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
@@ -69,11 +73,6 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "from",
|
||||
Usage: "specify the root to be transformed",
|
||||
Destination: &opts.from,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "input",
|
||||
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
|
||||
@@ -86,10 +85,9 @@ func (m command) build() *cli.Command {
|
||||
Destination: &opts.output,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "relative-to",
|
||||
Usage: "specify whether the transform is relative to the host or to the container. One of [ host | container ]",
|
||||
Value: "host",
|
||||
Destination: &opts.relativeTo,
|
||||
Name: "from",
|
||||
Usage: "specify the root to be transformed",
|
||||
Destination: &opts.from,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "to",
|
||||
@@ -103,12 +101,6 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, opts *options) error {
|
||||
switch opts.relativeTo {
|
||||
case "host":
|
||||
case "container":
|
||||
default:
|
||||
return fmt.Errorf("invalid --relative-to value: %v", opts.relativeTo)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -118,10 +110,9 @@ func (m command) run(c *cli.Context, opts *options) error {
|
||||
return fmt.Errorf("failed to load CDI specification: %w", err)
|
||||
}
|
||||
|
||||
err = transformroot.New(
|
||||
transformroot.WithRoot(opts.from),
|
||||
transformroot.WithTargetRoot(opts.to),
|
||||
transformroot.WithRelativeTo(opts.relativeTo),
|
||||
err = transform.NewRootTransformer(
|
||||
opts.from,
|
||||
opts.to,
|
||||
).Transform(spec.Raw())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to transform CDI specification: %w", err)
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package transform
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -121,10 +121,10 @@ func run(c *cli.Context, opts *options) error {
|
||||
}
|
||||
defer output.Close()
|
||||
|
||||
if _, err := cfgToml.Save(output); err != nil {
|
||||
return fmt.Errorf("failed to save config: %v", err)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfgToml.Save(output)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -19,11 +19,10 @@ package defaultsubcommand
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -86,7 +85,8 @@ func (m command) run(c *cli.Context, opts *flags.Options) error {
|
||||
}
|
||||
defer output.Close()
|
||||
|
||||
if _, err = cfgToml.Save(output); err != nil {
|
||||
_, err = cfgToml.Save(output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write output: %v", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,18 +17,16 @@
|
||||
package chmod
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -37,8 +35,7 @@ type command struct {
|
||||
|
||||
type config struct {
|
||||
paths cli.StringSlice
|
||||
modeStr string
|
||||
mode fs.FileMode
|
||||
mode string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
@@ -69,13 +66,13 @@ func (m command) build() *cli.Command {
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "path",
|
||||
Usage: "Specify a path to apply the specified mode to",
|
||||
Usage: "Specifiy a path to apply the specified mode to",
|
||||
Destination: &cfg.paths,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "mode",
|
||||
Usage: "Specify the file mode",
|
||||
Destination: &cfg.modeStr,
|
||||
Destination: &cfg.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
@@ -88,16 +85,10 @@ func (m command) build() *cli.Command {
|
||||
}
|
||||
|
||||
func validateFlags(c *cli.Context, cfg *config) error {
|
||||
if strings.TrimSpace(cfg.modeStr) == "" {
|
||||
if strings.TrimSpace(cfg.mode) == "" {
|
||||
return fmt.Errorf("a non-empty mode must be specified")
|
||||
}
|
||||
|
||||
modeInt, err := strconv.ParseUint(cfg.modeStr, 8, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse mode as octal: %v", err)
|
||||
}
|
||||
cfg.mode = fs.FileMode(modeInt)
|
||||
|
||||
for _, p := range cfg.paths.Value() {
|
||||
if strings.TrimSpace(p) == "" {
|
||||
return fmt.Errorf("paths must not be empty")
|
||||
@@ -121,38 +112,33 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
return fmt.Errorf("empty container root detected")
|
||||
}
|
||||
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value(), cfg.mode)
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value())
|
||||
if len(paths) == 0 {
|
||||
m.logger.Debugf("No paths specified; exiting")
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, path := range paths {
|
||||
err = os.Chmod(path, cfg.mode)
|
||||
// in some cases this is not an issue (e.g. whole /dev mounted), see #143
|
||||
if errors.Is(err, fs.ErrPermission) {
|
||||
m.logger.Debugf("Ignoring permission error with chmod: %v", err)
|
||||
err = nil
|
||||
}
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
targets, err := locator.Locate("chmod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to locate chmod: %v", err)
|
||||
}
|
||||
chmodPath := targets[0]
|
||||
|
||||
return err
|
||||
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
|
||||
|
||||
return syscall.Exec(chmodPath, args, nil)
|
||||
}
|
||||
|
||||
// getPaths updates the specified paths relative to the root.
|
||||
func (m command) getPaths(root string, paths []string, desiredMode fs.FileMode) []string {
|
||||
func (m command) getPaths(root string, paths []string) []string {
|
||||
var pathsInRoot []string
|
||||
for _, f := range paths {
|
||||
path := filepath.Join(root, f)
|
||||
stat, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
m.logger.Debugf("Skipping path %q: %v", path, err)
|
||||
continue
|
||||
}
|
||||
if (stat.Mode()&(fs.ModePerm|fs.ModeSetuid|fs.ModeSetgid|fs.ModeSticky))^desiredMode == 0 {
|
||||
m.logger.Debugf("Skipping path %q: already desired mode", path)
|
||||
continue
|
||||
}
|
||||
pathsInRoot = append(pathsInRoot, path)
|
||||
}
|
||||
|
||||
|
||||
@@ -22,13 +22,12 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -57,7 +56,7 @@ func (m command) build() *cli.Command {
|
||||
// Create the '' command
|
||||
c := cli.Command{
|
||||
Name: "create-symlinks",
|
||||
Usage: "A hook to create symlinks in the container. This can be used to process CSV mount specs",
|
||||
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type hookCommand struct {
|
||||
|
||||
@@ -17,27 +17,22 @@
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
type options struct {
|
||||
type config struct {
|
||||
folders cli.StringSlice
|
||||
ldconfigPath string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
@@ -51,15 +46,12 @@ func NewCommand(logger logger.Interface) *cli.Command {
|
||||
|
||||
// build the update-ldcache command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := options{}
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'update-ldcache' command
|
||||
c := cli.Command{
|
||||
Name: "update-ldcache",
|
||||
Usage: "Update ldcache in a container by running ldconfig",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
@@ -68,15 +60,9 @@ func (m command) build() *cli.Command {
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "folder",
|
||||
Usage: "Specify a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Destination: &cfg.folders,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "ldconfig-path",
|
||||
Usage: "Specify the path to the ldconfig program",
|
||||
Destination: &cfg.ldconfigPath,
|
||||
Value: "/sbin/ldconfig",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
@@ -87,14 +73,7 @@ func (m command) build() *cli.Command {
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(c *cli.Context, cfg *options) error {
|
||||
if cfg.ldconfigPath == "" {
|
||||
return errors.New("ldconfig-path must be specified")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *options) error {
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
@@ -105,52 +84,23 @@ func (m command) run(c *cli.Context, cfg *options) error {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
ldconfigPath := m.resolveLDConfigPath(cfg.ldconfigPath)
|
||||
args := []string{filepath.Base(ldconfigPath)}
|
||||
_, err = os.Stat(filepath.Join(containerRoot, "/etc/ld.so.cache"))
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
m.logger.Debugf("No ld.so.cache found, skipping update")
|
||||
return nil
|
||||
}
|
||||
|
||||
err = m.createConfig(containerRoot, cfg.folders.Value())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf: %v", err)
|
||||
}
|
||||
|
||||
args := []string{"/sbin/ldconfig"}
|
||||
if containerRoot != "" {
|
||||
args = append(args, "-r", containerRoot)
|
||||
}
|
||||
|
||||
if root(containerRoot).hasPath("/etc/ld.so.cache") {
|
||||
args = append(args, "-C", "/etc/ld.so.cache")
|
||||
} else {
|
||||
m.logger.Debugf("No ld.so.cache found, skipping update")
|
||||
args = append(args, "-N")
|
||||
}
|
||||
|
||||
folders := cfg.folders.Value()
|
||||
if root(containerRoot).hasPath("/etc/ld.so.conf.d") {
|
||||
err := m.createConfig(containerRoot, folders)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
|
||||
}
|
||||
} else {
|
||||
args = append(args, folders...)
|
||||
}
|
||||
|
||||
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
|
||||
// be configured to use a different config file by default.
|
||||
args = append(args, "-f", "/etc/ld.so.conf")
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(ldconfigPath, args, nil)
|
||||
}
|
||||
|
||||
type root string
|
||||
|
||||
func (r root) hasPath(path string) bool {
|
||||
_, err := os.Stat(filepath.Join(string(r), path))
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// resolveLDConfigPath determines the LDConfig path to use for the system.
|
||||
// On systems such as Ubuntu where `/sbin/ldconfig` is a wrapper around
|
||||
// /sbin/ldconfig.real, the latter is returned.
|
||||
func (m command) resolveLDConfigPath(path string) string {
|
||||
return strings.TrimPrefix(config.NormalizeLDConfigPath("@"+path), "@")
|
||||
return syscall.Exec(args[0], args, nil)
|
||||
}
|
||||
|
||||
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
|
||||
|
||||
@@ -17,9 +17,8 @@
|
||||
package info
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -19,8 +19,6 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
|
||||
@@ -28,6 +26,7 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
@@ -309,11 +309,9 @@ func enableCDI(config *config, cfg engine.Interface) error {
|
||||
}
|
||||
switch config.runtime {
|
||||
case "containerd":
|
||||
cfg.Set("enable_cdi", true)
|
||||
return cfg.Set("enable_cdi", true)
|
||||
case "docker":
|
||||
cfg.Set("features", map[string]bool{"cdi": true})
|
||||
default:
|
||||
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
|
||||
return cfg.Set("features", map[string]bool{"cdi": true})
|
||||
}
|
||||
return nil
|
||||
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
|
||||
}
|
||||
|
||||
@@ -17,10 +17,9 @@
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type runtimeCommand struct {
|
||||
|
||||
@@ -20,11 +20,10 @@ import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
|
||||
)
|
||||
|
||||
type allPossible struct {
|
||||
|
||||
@@ -24,12 +24,11 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type nodeLister interface {
|
||||
@@ -64,13 +63,20 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
|
||||
if m.nodeIsBlocked(d) {
|
||||
continue
|
||||
}
|
||||
|
||||
var stat unix.Stat_t
|
||||
err := unix.Stat(d, &stat)
|
||||
if err != nil {
|
||||
m.logger.Warningf("Could not stat device: %v", err)
|
||||
continue
|
||||
}
|
||||
deviceNodes = append(deviceNodes, newDeviceNode(d, stat))
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(uint64(stat.Rdev)),
|
||||
minor: unix.Minor(uint64(stat.Rdev)),
|
||||
}
|
||||
|
||||
deviceNodes = append(deviceNodes, deviceNode)
|
||||
}
|
||||
|
||||
return deviceNodes, nil
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package devchar
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(stat.Rdev),
|
||||
minor: unix.Minor(stat.Rdev),
|
||||
}
|
||||
return deviceNode
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
//go:build !linux
|
||||
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package devchar
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
|
||||
deviceNode := deviceNode{
|
||||
path: d,
|
||||
major: unix.Major(uint64(stat.Rdev)),
|
||||
minor: unix.Minor(uint64(stat.Rdev)),
|
||||
}
|
||||
return deviceNode
|
||||
}
|
||||
@@ -19,11 +19,10 @@ package createdevicenodes
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -19,10 +19,9 @@ package createdevicenodes
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -17,12 +17,11 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
|
||||
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/print-ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG GOLANGCI_LINT_VERSION=v1.54.1
|
||||
FROM golang:${GOLANG_VERSION}
|
||||
|
||||
RUN go install golang.org/x/lint/golint@6edffad5e6160f5949cdefc81710b2706fbcd4f6
|
||||
@@ -20,8 +19,3 @@ RUN go install github.com/matryer/moq@latest
|
||||
RUN go install github.com/gordonklaus/ineffassign@d2c82e48359b033cde9cf1307f6d5550b8d61321
|
||||
RUN go install github.com/client9/misspell/cmd/misspell@latest
|
||||
RUN go install github.com/google/go-licenses@latest
|
||||
RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin ${GOLANGCI_LINT_VERSION}
|
||||
|
||||
# We need to set the /work directory as a safe directory.
|
||||
# This allows git commands to run in the container.
|
||||
RUN git config --file=/.gitconfig --add safe.directory /work
|
||||
|
||||
7
go.mod
7
go.mod
@@ -3,14 +3,14 @@ module github.com/NVIDIA/nvidia-container-toolkit
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
github.com/NVIDIA/go-nvlib v0.0.0-20231212194527-f3264c8a6a7a
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1
|
||||
github.com/fsnotify/fsnotify v1.5.4
|
||||
github.com/opencontainers/runtime-spec v1.1.0
|
||||
github.com/pelletier/go-toml v1.9.4
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/stretchr/testify v1.8.4
|
||||
github.com/stretchr/testify v1.8.1
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884
|
||||
golang.org/x/mod v0.5.0
|
||||
golang.org/x/sys v0.7.0
|
||||
tags.cncf.io/container-device-interface v0.6.2
|
||||
@@ -20,7 +20,6 @@ require (
|
||||
require (
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/google/uuid v1.4.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
|
||||
|
||||
14
go.sum
14
go.sum
@@ -1,8 +1,6 @@
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/NVIDIA/go-nvlib v0.0.0-20231212194527-f3264c8a6a7a h1:aHaNKihxpWzWnV3yoVkit3bhOF7cg2ScCbzW+gepQ/E=
|
||||
github.com/NVIDIA/go-nvlib v0.0.0-20231212194527-f3264c8a6a7a/go.mod h1:U82N6/xKp6OnoqpALBH0C5SO59Buu4sX1Z3rQtBsBKQ=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f h1:FTblgO87K1vPB8tcwM5EOFpFf6UpsrlDpErPm25mFWE=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1 h1:6mdjtlFo+17dWL7VFPfuRMtf0061TF4DKls9pkSw6uM=
|
||||
github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
|
||||
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
|
||||
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
@@ -15,8 +13,6 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
|
||||
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
|
||||
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
|
||||
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
|
||||
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
@@ -61,8 +57,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
|
||||
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
|
||||
@@ -75,6 +71,8 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
|
||||
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884 h1:V0LUbfm4kVA1CPG8FgG9AGZqa3ykE5U12Gd3PZgoItA=
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884/go.mod h1:/x5Ky1ZJNyCjDkgSL1atII0EFKQF5WaIHKeP5nkaQfk=
|
||||
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
|
||||
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
|
||||
@@ -16,11 +16,6 @@
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ContainerCLIConfig stores the options for the nvidia-container-cli
|
||||
type ContainerCLIConfig struct {
|
||||
Root string `toml:"root"`
|
||||
@@ -36,27 +31,3 @@ type ContainerCLIConfig struct {
|
||||
User string `toml:"user"`
|
||||
Ldconfig string `toml:"ldconfig"`
|
||||
}
|
||||
|
||||
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
||||
// This is only done for host LDConfigs and is required to handle systems where
|
||||
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
|
||||
func (c *ContainerCLIConfig) NormalizeLDConfigPath() string {
|
||||
return NormalizeLDConfigPath(c.Ldconfig)
|
||||
}
|
||||
|
||||
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
||||
// This is only done for host LDConfigs and is required to handle systems where
|
||||
// /sbin/ldconfig is a wrapper around /sbin/ldconfig.real.
|
||||
func NormalizeLDConfigPath(path string) string {
|
||||
if !strings.HasPrefix(path, "@") {
|
||||
return path
|
||||
}
|
||||
|
||||
trimmedPath := strings.TrimSuffix(strings.TrimPrefix(path, "@"), ".real")
|
||||
// If the .real path exists, we return that.
|
||||
if _, err := os.Stat(trimmedPath + ".real"); err == nil {
|
||||
return "@" + trimmedPath + ".real"
|
||||
}
|
||||
// If the .real path does not exists (or cannot be read) we return the non-.real path.
|
||||
return "@" + trimmedPath
|
||||
}
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
/**
|
||||
# Copyright 2023 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNormalizeLDConfigPath(t *testing.T) {
|
||||
testDir := t.TempDir()
|
||||
|
||||
f, err := os.Create(filepath.Join(testDir, "exists.real"))
|
||||
require.NoError(t, err)
|
||||
_ = f.Close()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
ldconfig string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
description: "empty input",
|
||||
},
|
||||
{
|
||||
description: "non-host with .real suffix returns as is",
|
||||
ldconfig: "/some/path/ldconfig.real",
|
||||
expected: "/some/path/ldconfig.real",
|
||||
},
|
||||
{
|
||||
description: "non-host without .real suffix returns as is",
|
||||
ldconfig: "/some/path/ldconfig",
|
||||
expected: "/some/path/ldconfig",
|
||||
},
|
||||
{
|
||||
description: "host .real file exists is returned",
|
||||
ldconfig: "@" + filepath.Join(testDir, "exists.real"),
|
||||
expected: "@" + filepath.Join(testDir, "exists.real"),
|
||||
},
|
||||
{
|
||||
description: "host resolves .real file",
|
||||
ldconfig: "@" + filepath.Join(testDir, "exists"),
|
||||
expected: "@" + filepath.Join(testDir, "exists.real"),
|
||||
},
|
||||
{
|
||||
description: "host .real file not exists strips suffix",
|
||||
ldconfig: "@/does/not/exist.real",
|
||||
expected: "@/does/not/exist",
|
||||
},
|
||||
{
|
||||
description: "host file returned as is if no .real file exsits",
|
||||
ldconfig: "@/does/not/exist",
|
||||
expected: "@/does/not/exist",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
c := ContainerCLIConfig{
|
||||
Ldconfig: tc.ldconfig,
|
||||
}
|
||||
|
||||
require.Equal(t, tc.expected, c.NormalizeLDConfigPath())
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -124,7 +124,10 @@ func GetDefault() (*Config, error) {
|
||||
}
|
||||
|
||||
func getLdConfigPath() string {
|
||||
return NormalizeLDConfigPath("@/sbin/ldconfig")
|
||||
if _, err := os.Stat("/sbin/ldconfig.real"); err == nil {
|
||||
return "@/sbin/ldconfig.real"
|
||||
}
|
||||
return "@/sbin/ldconfig"
|
||||
}
|
||||
|
||||
func getUserGroup() string {
|
||||
|
||||
@@ -35,7 +35,7 @@ func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, os.WriteFile(filename, contents, 0600))
|
||||
require.NoError(t, os.WriteFile(filename, contents, 0766))
|
||||
|
||||
cfg, err := GetConfig()
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -19,13 +19,10 @@ package image
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type builder struct {
|
||||
env map[string]string
|
||||
mounts []specs.Mount
|
||||
env []string
|
||||
disableRequire bool
|
||||
}
|
||||
|
||||
@@ -33,12 +30,7 @@ type builder struct {
|
||||
func New(opt ...Option) (CUDA, error) {
|
||||
b := &builder{}
|
||||
for _, o := range opt {
|
||||
if err := o(b); err != nil {
|
||||
return CUDA{}, err
|
||||
}
|
||||
}
|
||||
if b.env == nil {
|
||||
b.env = make(map[string]string)
|
||||
o(b)
|
||||
}
|
||||
|
||||
return b.build()
|
||||
@@ -46,57 +38,36 @@ func New(opt ...Option) (CUDA, error) {
|
||||
|
||||
// build creates a CUDA image from the builder.
|
||||
func (b builder) build() (CUDA, error) {
|
||||
if b.disableRequire {
|
||||
b.env[envNVDisableRequire] = "true"
|
||||
c := make(CUDA)
|
||||
|
||||
for _, e := range b.env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
c[parts[0]] = parts[1]
|
||||
}
|
||||
|
||||
c := CUDA{
|
||||
env: b.env,
|
||||
mounts: b.mounts,
|
||||
if b.disableRequire {
|
||||
c[envNVDisableRequire] = "true"
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Option is a functional option for creating a CUDA image.
|
||||
type Option func(*builder) error
|
||||
type Option func(*builder)
|
||||
|
||||
// WithDisableRequire sets the disable require option.
|
||||
func WithDisableRequire(disableRequire bool) Option {
|
||||
return func(b *builder) error {
|
||||
return func(b *builder) {
|
||||
b.disableRequire = disableRequire
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithEnv sets the environment variables to use when creating the CUDA image.
|
||||
// Note that this also overwrites the values set with WithEnvMap.
|
||||
func WithEnv(env []string) Option {
|
||||
return func(b *builder) error {
|
||||
envmap := make(map[string]string)
|
||||
for _, e := range env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
envmap[parts[0]] = parts[1]
|
||||
}
|
||||
return WithEnvMap(envmap)(b)
|
||||
}
|
||||
}
|
||||
|
||||
// WithEnvMap sets the environment variable map to use when creating the CUDA image.
|
||||
// Note that this also overwrites the values set with WithEnv.
|
||||
func WithEnvMap(env map[string]string) Option {
|
||||
return func(b *builder) error {
|
||||
return func(b *builder) {
|
||||
b.env = env
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithMounts sets the mounts associated with the CUDA image.
|
||||
func WithMounts(mounts []specs.Mount) Option {
|
||||
return func(b *builder) error {
|
||||
b.mounts = mounts
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ func (c DriverCapabilities) Has(capability DriverCapability) bool {
|
||||
return c[capability]
|
||||
}
|
||||
|
||||
// Any checks whether any of the specified capabilities are set
|
||||
// Any checks whether any of the specified capabilites are set
|
||||
func (c DriverCapabilities) Any(capabilities ...DriverCapability) bool {
|
||||
if c.IsAll() {
|
||||
return true
|
||||
|
||||
@@ -18,13 +18,11 @@ package image
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
"tags.cncf.io/container-device-interface/pkg/parser"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -39,10 +37,7 @@ const (
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
// a map of environment variable to values that can be used to perform lookups
|
||||
// such as requirements.
|
||||
type CUDA struct {
|
||||
env map[string]string
|
||||
mounts []specs.Mount
|
||||
}
|
||||
type CUDA map[string]string
|
||||
|
||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||
// The process environment is read (if present) to construc the CUDA Image.
|
||||
@@ -52,10 +47,7 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||
env = spec.Process.Env
|
||||
}
|
||||
|
||||
return New(
|
||||
WithEnv(env),
|
||||
WithMounts(spec.Mounts),
|
||||
)
|
||||
return New(WithEnv(env))
|
||||
}
|
||||
|
||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
@@ -64,24 +56,12 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
return New(WithEnv(env))
|
||||
}
|
||||
|
||||
// Getenv returns the value of the specified environment variable.
|
||||
// If the environment variable is not specified, an empty string is returned.
|
||||
func (i CUDA) Getenv(key string) string {
|
||||
return i.env[key]
|
||||
}
|
||||
|
||||
// HasEnvvar checks whether the specified envvar is defined in the image.
|
||||
func (i CUDA) HasEnvvar(key string) bool {
|
||||
_, exists := i.env[key]
|
||||
return exists
|
||||
}
|
||||
|
||||
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||
func (i CUDA) IsLegacy() bool {
|
||||
legacyCudaVersion := i.env[envCUDAVersion]
|
||||
cudaRequire := i.env[envNVRequireCUDA]
|
||||
legacyCudaVersion := i[envCUDAVersion]
|
||||
cudaRequire := i[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
@@ -94,7 +74,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range i.env {
|
||||
for name, value := range i {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
@@ -113,7 +93,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||
func (i CUDA) HasDisableRequire() bool {
|
||||
if disable, exists := i.env[envNVDisableRequire]; exists {
|
||||
if disable, exists := i[envNVDisableRequire]; exists {
|
||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||
d, _ := strconv.ParseBool(disable)
|
||||
return d
|
||||
@@ -124,12 +104,12 @@ func (i CUDA) HasDisableRequire() bool {
|
||||
|
||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
// We concantenate all the devices from the specified env.
|
||||
// We concantenate all the devices from the specified envvars.
|
||||
var isSet bool
|
||||
var devices []string
|
||||
requested := make(map[string]bool)
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := i.env[envVar]; ok {
|
||||
if devs, ok := i[envVar]; ok {
|
||||
isSet = true
|
||||
for _, d := range strings.Split(devs, ",") {
|
||||
trimmed := strings.TrimSpace(d)
|
||||
@@ -157,18 +137,18 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
|
||||
// GetDriverCapabilities returns the requested driver capabilities.
|
||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
env := i.env[envNVDriverCapabilities]
|
||||
env := i[envNVDriverCapabilities]
|
||||
|
||||
capabilities := make(DriverCapabilities)
|
||||
capabilites := make(DriverCapabilities)
|
||||
for _, c := range strings.Split(env, ",") {
|
||||
capabilities[DriverCapability(c)] = true
|
||||
capabilites[DriverCapability(c)] = true
|
||||
}
|
||||
|
||||
return capabilities
|
||||
return capabilites
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
cudaVersion := i.env[envCUDAVersion]
|
||||
cudaVersion := i[envCUDAVersion]
|
||||
majorMinor, err := parseMajorMinorVersion(cudaVersion)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
|
||||
@@ -198,79 +178,3 @@ func parseMajorMinorVersion(version string) (string, error) {
|
||||
}
|
||||
return majorMinor, nil
|
||||
}
|
||||
|
||||
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
||||
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||
var hasCDIdevice bool
|
||||
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
||||
if !parser.IsQualifiedName(device) {
|
||||
return false
|
||||
}
|
||||
hasCDIdevice = true
|
||||
}
|
||||
|
||||
for _, device := range i.DevicesFromMounts() {
|
||||
if !strings.HasPrefix(device, "cdi/") {
|
||||
return false
|
||||
}
|
||||
hasCDIdevice = true
|
||||
}
|
||||
return hasCDIdevice
|
||||
}
|
||||
|
||||
const (
|
||||
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||
)
|
||||
|
||||
// DevicesFromMounts returns a list of device specified as mounts.
|
||||
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||
func (i CUDA) DevicesFromMounts() []string {
|
||||
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
||||
seen := make(map[string]bool)
|
||||
var devices []string
|
||||
for _, m := range i.mounts {
|
||||
source := filepath.Clean(m.Source)
|
||||
// Only consider mounts who's host volume is /dev/null
|
||||
if source != "/dev/null" {
|
||||
continue
|
||||
}
|
||||
|
||||
destination := filepath.Clean(m.Destination)
|
||||
if seen[destination] {
|
||||
continue
|
||||
}
|
||||
seen[destination] = true
|
||||
|
||||
// Only consider container mount points that begin with 'root'
|
||||
if !strings.HasPrefix(destination, root) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Grab the full path beyond 'root' and add it to the list of devices
|
||||
device := strings.Trim(strings.TrimPrefix(destination, root), "/")
|
||||
if len(device) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, device)
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
|
||||
func (i CUDA) CDIDevicesFromMounts() []string {
|
||||
var devices []string
|
||||
for _, mountDevice := range i.DevicesFromMounts() {
|
||||
if !strings.HasPrefix(mountDevice, "cdi/") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
|
||||
if len(parts) != 3 {
|
||||
continue
|
||||
}
|
||||
vendor := parts[0]
|
||||
class := parts[1]
|
||||
device := parts[2]
|
||||
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
@@ -126,6 +126,7 @@ func TestGetRequirements(t *testing.T) {
|
||||
requirements, err := image.GetRequirements()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, tc.requirements, requirements)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
@@ -154,7 +154,10 @@ func (t Toml) contents() ([]byte, error) {
|
||||
// format fixes the comments for the config to ensure that they start in column
|
||||
// 1 and are not followed by a space.
|
||||
func (t Toml) format(contents []byte) ([]byte, error) {
|
||||
r := regexp.MustCompile(`(\n*)\s*?#\s*(\S.*)`)
|
||||
r, err := regexp.Compile(`(\n*)\s*?#\s*(\S.*)`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to compile regexp: %v", err)
|
||||
}
|
||||
replaced := r.ReplaceAll(contents, []byte("$1#$2"))
|
||||
|
||||
return replaced, nil
|
||||
|
||||
@@ -27,13 +27,20 @@ type charDevices mounts
|
||||
var _ Discover = (*charDevices)(nil)
|
||||
|
||||
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
|
||||
func NewCharDeviceDiscoverer(logger logger.Interface, devRoot string, devices []string) Discover {
|
||||
func NewCharDeviceDiscoverer(logger logger.Interface, devices []string, root string) Discover {
|
||||
locator := lookup.NewCharDeviceLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(devRoot),
|
||||
lookup.WithRoot(root),
|
||||
)
|
||||
|
||||
return (*charDevices)(newMounts(logger, locator, devRoot, devices))
|
||||
return NewDeviceDiscoverer(logger, locator, root, devices)
|
||||
}
|
||||
|
||||
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
|
||||
func NewDeviceDiscoverer(logger logger.Interface, locator lookup.Locator, root string, devices []string) Discover {
|
||||
m := NewMounts(logger, locator, root, devices).(*mounts)
|
||||
|
||||
return (*charDevices)(m)
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts for the charDevices.
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
func TestCharDevices(t *testing.T) {
|
||||
|
||||
@@ -30,8 +30,8 @@ type filtered struct {
|
||||
filter Filter
|
||||
}
|
||||
|
||||
// newFilteredDiscoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
|
||||
func newFilteredDiscoverer(logger logger.Interface, applyTo Discover, filter Filter) Discover {
|
||||
// newFilteredDisoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
|
||||
func newFilteredDisoverer(logger logger.Interface, applyTo Discover, filter Filter) Discover {
|
||||
return filtered{
|
||||
Discover: applyTo,
|
||||
logger: logger,
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
func NewGDRCopyDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
|
||||
return NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
[]string{"/dev/gdrdrv"},
|
||||
), nil
|
||||
}
|
||||
@@ -29,17 +29,17 @@ type gdsDeviceDiscoverer struct {
|
||||
}
|
||||
|
||||
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
|
||||
func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string) (Discover, error) {
|
||||
func NewGDSDiscoverer(logger logger.Interface, root string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
[]string{"/dev/nvidia-fs*"},
|
||||
root,
|
||||
)
|
||||
|
||||
udev := NewMounts(
|
||||
logger,
|
||||
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(driverRoot)),
|
||||
driverRoot,
|
||||
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(root)),
|
||||
root,
|
||||
[]string{"/run/udev"},
|
||||
)
|
||||
|
||||
@@ -47,9 +47,9 @@ func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithRoot(root),
|
||||
),
|
||||
driverRoot,
|
||||
root,
|
||||
[]string{"/etc/cufile.json"},
|
||||
)
|
||||
|
||||
|
||||
@@ -28,34 +28,42 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
)
|
||||
|
||||
// NewDRMNodesDiscoverer returns a discoverer for the DRM device nodes associated with the specified visible devices.
|
||||
//
|
||||
// TODO: The logic for creating DRM devices should be consolidated between this
|
||||
// and the logic for generating CDI specs for a single device. This is only used
|
||||
// when applying OCI spec modifications to an incoming spec in "legacy" mode.
|
||||
func NewDRMNodesDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot)
|
||||
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
|
||||
func NewGraphicsDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
|
||||
}
|
||||
|
||||
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
|
||||
}
|
||||
|
||||
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath)
|
||||
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, driverRoot, nvidiaCTKPath)
|
||||
|
||||
discover := Merge(
|
||||
Merge(drmDeviceNodes, drmByPathSymlinks),
|
||||
mounts,
|
||||
)
|
||||
|
||||
discover := Merge(drmDeviceNodes, drmByPathSymlinks)
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
// NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan.
|
||||
func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
|
||||
func NewGraphicsMountsDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
locator, err := lookup.NewLibraryLocator(logger, driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct library locator: %v", err)
|
||||
}
|
||||
libraries := NewMounts(
|
||||
logger,
|
||||
driver.Libraries(),
|
||||
driver.Root,
|
||||
locator,
|
||||
driverRoot,
|
||||
[]string{
|
||||
"libnvidia-egl-gbm.so.*",
|
||||
"libnvidia-egl-gbm.so",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -63,10 +71,10 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, n
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driver.Root),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths("/etc", "/usr/share"),
|
||||
),
|
||||
driver.Root,
|
||||
driverRoot,
|
||||
[]string{
|
||||
"glvnd/egl_vendor.d/10_nvidia.json",
|
||||
"vulkan/icd.d/nvidia_icd.json",
|
||||
@@ -78,7 +86,7 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, n
|
||||
},
|
||||
)
|
||||
|
||||
xorg := optionalXorgDiscoverer(logger, driver, nvidiaCTKPath)
|
||||
xorg := optionalXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
|
||||
|
||||
discover := Merge(
|
||||
libraries,
|
||||
@@ -93,16 +101,16 @@ type drmDevicesByPath struct {
|
||||
None
|
||||
logger logger.Interface
|
||||
nvidiaCTKPath string
|
||||
devRoot string
|
||||
driverRoot string
|
||||
devicesFrom Discover
|
||||
}
|
||||
|
||||
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
|
||||
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover {
|
||||
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, driverRoot string, nvidiaCTKPath string) Discover {
|
||||
d := drmDevicesByPath{
|
||||
logger: logger,
|
||||
nvidiaCTKPath: nvidiaCTKPath,
|
||||
devRoot: devRoot,
|
||||
driverRoot: driverRoot,
|
||||
devicesFrom: devices,
|
||||
}
|
||||
|
||||
@@ -140,7 +148,7 @@ func (d drmDevicesByPath) Hooks() ([]Hook, error) {
|
||||
return []Hook{hook}, nil
|
||||
}
|
||||
|
||||
// getSpecificLinkArgs returns the required specific links that need to be created
|
||||
// getSpecificLinkArgs returns the required specic links that need to be created
|
||||
func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error) {
|
||||
selectedDevices := make(map[string]bool)
|
||||
for _, d := range devices {
|
||||
@@ -149,7 +157,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
|
||||
|
||||
linkLocator := lookup.NewFileLocator(
|
||||
lookup.WithLogger(d.logger),
|
||||
lookup.WithRoot(d.devRoot),
|
||||
lookup.WithRoot(d.driverRoot),
|
||||
)
|
||||
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
|
||||
if err != nil {
|
||||
@@ -175,23 +183,27 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
|
||||
}
|
||||
|
||||
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
|
||||
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Discover, error) {
|
||||
allDevices := NewCharDeviceDiscoverer(
|
||||
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Discover, error) {
|
||||
allDevices := NewDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
lookup.NewCharDeviceLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
),
|
||||
driverRoot,
|
||||
[]string{
|
||||
"/dev/dri/card*",
|
||||
"/dev/dri/renderD*",
|
||||
},
|
||||
)
|
||||
|
||||
filter, err := newDRMDeviceFilter(devices, devRoot)
|
||||
filter, err := newDRMDeviceFilter(logger, devices, driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
|
||||
}
|
||||
|
||||
// We return a discoverer that applies the DRM device filter created above to all discovered DRM device nodes.
|
||||
d := newFilteredDiscoverer(
|
||||
d := newFilteredDisoverer(
|
||||
logger,
|
||||
allDevices,
|
||||
filter,
|
||||
@@ -201,8 +213,8 @@ func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevice
|
||||
}
|
||||
|
||||
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
|
||||
func newDRMDeviceFilter(devices image.VisibleDevices, devRoot string) (Filter, error) {
|
||||
gpuInformationPaths, err := proc.GetInformationFilePaths(devRoot)
|
||||
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Filter, error) {
|
||||
gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read GPU information: %v", err)
|
||||
}
|
||||
@@ -229,7 +241,7 @@ func newDRMDeviceFilter(devices image.VisibleDevices, devRoot string) (Filter, e
|
||||
return nil, fmt.Errorf("failed to determine DRM devices for %v: %v", busID, err)
|
||||
}
|
||||
for _, drmDeviceNode := range drmDeviceNodes {
|
||||
filter[drmDeviceNode] = true
|
||||
filter[filepath.Join(drmDeviceNode)] = true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -246,8 +258,8 @@ var _ Discover = (*xorgHooks)(nil)
|
||||
|
||||
// optionalXorgDiscoverer creates a discoverer for Xorg libraries.
|
||||
// If the creation of the discoverer fails, a None discoverer is returned.
|
||||
func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) Discover {
|
||||
xorg, err := newXorgDiscoverer(logger, driver, nvidiaCTKPath)
|
||||
func optionalXorgDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) Discover {
|
||||
xorg, err := newXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
|
||||
if err != nil {
|
||||
logger.Warningf("Failed to create Xorg discoverer: %v; skipping xorg libraries", err)
|
||||
return None{}
|
||||
@@ -255,9 +267,10 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia
|
||||
return xorg
|
||||
}
|
||||
|
||||
func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
|
||||
func newXorgDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (Discover, error) {
|
||||
libCudaPaths, err := cuda.New(
|
||||
driver.Libraries(),
|
||||
cuda.WithLogger(logger),
|
||||
cuda.WithDriverRoot(driverRoot),
|
||||
).Locate(".*.*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
|
||||
@@ -274,11 +287,11 @@ func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPa
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driver.Root),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths(libRoot, "/usr/lib/x86_64-linux-gnu"),
|
||||
lookup.WithCount(1),
|
||||
),
|
||||
driver.Root,
|
||||
driverRoot,
|
||||
[]string{
|
||||
"nvidia/xorg/nvidia_drv.so",
|
||||
fmt.Sprintf("nvidia/xorg/libglxserver_nvidia.so.%s", version),
|
||||
@@ -290,20 +303,20 @@ func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPa
|
||||
nvidiaCTKPath: nvidiaCTKPath,
|
||||
}
|
||||
|
||||
xorgConfig := NewMounts(
|
||||
xorgConfg := NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driver.Root),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithSearchPaths("/usr/share"),
|
||||
),
|
||||
driver.Root,
|
||||
driverRoot,
|
||||
[]string{"X11/xorg.conf.d/10-nvidia.conf"},
|
||||
)
|
||||
|
||||
d := Merge(
|
||||
xorgLibs,
|
||||
xorgConfig,
|
||||
xorgConfg,
|
||||
xorgHooks,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,10 +19,9 @@ package discover
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
func TestIPCMounts(t *testing.T) {
|
||||
|
||||
@@ -70,7 +70,7 @@ func (d *ipcMounts) Mounts() ([]Mount, error) {
|
||||
var modifiedMounts []Mount
|
||||
for _, m := range mounts {
|
||||
mount := m
|
||||
mount.Options = append(mount.Options, "noexec")
|
||||
mount.Options = append(m.Options, "noexec")
|
||||
modifiedMounts = append(modifiedMounts, mount)
|
||||
}
|
||||
|
||||
|
||||
@@ -25,11 +25,10 @@ import (
|
||||
)
|
||||
|
||||
// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified
|
||||
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath, ldconfigPath string) (Discover, error) {
|
||||
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath string) (Discover, error) {
|
||||
d := ldconfig{
|
||||
logger: logger,
|
||||
nvidiaCTKPath: nvidiaCTKPath,
|
||||
ldconfigPath: ldconfigPath,
|
||||
mountsFrom: mounts,
|
||||
}
|
||||
|
||||
@@ -40,7 +39,6 @@ type ldconfig struct {
|
||||
None
|
||||
logger logger.Interface
|
||||
nvidiaCTKPath string
|
||||
ldconfigPath string
|
||||
mountsFrom Discover
|
||||
}
|
||||
|
||||
@@ -52,20 +50,14 @@ func (d ldconfig) Hooks() ([]Hook, error) {
|
||||
}
|
||||
h := CreateLDCacheUpdateHook(
|
||||
d.nvidiaCTKPath,
|
||||
d.ldconfigPath,
|
||||
getLibraryPaths(mounts),
|
||||
)
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache
|
||||
func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []string) Hook {
|
||||
func CreateLDCacheUpdateHook(executable string, libraries []string) Hook {
|
||||
var args []string
|
||||
|
||||
if ldconfig != "" {
|
||||
args = append(args, "--ldconfig-path", ldconfig)
|
||||
}
|
||||
|
||||
for _, f := range uniqueFolders(libraries) {
|
||||
args = append(args, "--folder", f)
|
||||
}
|
||||
@@ -77,6 +69,7 @@ func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []str
|
||||
)
|
||||
|
||||
return hook
|
||||
|
||||
}
|
||||
|
||||
// getLibraryPaths extracts the library dirs from the specified mounts
|
||||
@@ -93,6 +86,7 @@ func getLibraryPaths(mounts []Mount) []string {
|
||||
|
||||
// isLibName checks if the specified filename is a library (i.e. ends in `.so*`)
|
||||
func isLibName(filename string) bool {
|
||||
|
||||
base := filepath.Base(filename)
|
||||
|
||||
isLib, err := filepath.Match("lib?*.so*", base)
|
||||
|
||||
@@ -26,7 +26,6 @@ import (
|
||||
|
||||
const (
|
||||
testNvidiaCTKPath = "/foo/bar/nvidia-ctk"
|
||||
testLdconfigPath = "/bar/baz/ldconfig"
|
||||
)
|
||||
|
||||
func TestLDCacheUpdateHook(t *testing.T) {
|
||||
@@ -34,7 +33,6 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
ldconfigPath string
|
||||
mounts []Mount
|
||||
mountError error
|
||||
expectedError error
|
||||
@@ -77,11 +75,6 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
||||
},
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"},
|
||||
},
|
||||
{
|
||||
description: "explicit ldconfig path is passed",
|
||||
ldconfigPath: testLdconfigPath,
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@@ -97,7 +90,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
||||
Lifecycle: "createContainer",
|
||||
}
|
||||
|
||||
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath, tc.ldconfigPath)
|
||||
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath)
|
||||
require.NoError(t, err)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
@@ -121,8 +114,10 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, mounts)
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIsLibName(t *testing.T) {
|
||||
|
||||
@@ -27,7 +27,7 @@ type list struct {
|
||||
|
||||
var _ Discover = (*list)(nil)
|
||||
|
||||
// Merge creates a discoverer that is the composite of a list of discoverers.
|
||||
// Merge creates a discoverer that is the composite of a list of discoveres.
|
||||
func Merge(d ...Discover) Discover {
|
||||
l := list{
|
||||
discoverers: d,
|
||||
|
||||
@@ -19,14 +19,14 @@ package discover
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
|
||||
func NewMOFEDDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
|
||||
func NewMOFEDDiscoverer(logger logger.Interface, root string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
[]string{
|
||||
"/dev/infiniband/uverbs*",
|
||||
"/dev/infiniband/rdma_cm",
|
||||
},
|
||||
root,
|
||||
)
|
||||
|
||||
return devices, nil
|
||||
|
||||
@@ -20,9 +20,8 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
)
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
// NewNvSwitchDiscoverer creates a discoverer for NVSWITCH devices.
|
||||
func NewNvSwitchDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
devRoot,
|
||||
[]string{
|
||||
"/dev/nvidia-nvswitchctl",
|
||||
"/dev/nvidia-nvswitch*",
|
||||
},
|
||||
)
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
@@ -16,6 +16,15 @@
|
||||
|
||||
package dxcore
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
)
|
||||
|
||||
const (
|
||||
libraryName = "libdxcore.so"
|
||||
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
|
||||
)
|
||||
|
||||
// dxcore stores a reference the dxcore dynamic library
|
||||
var dxcore *context
|
||||
|
||||
|
||||
@@ -19,9 +19,8 @@ package edits
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFromDiscovererAllowsMountsToIterate(t *testing.T) {
|
||||
|
||||
@@ -20,9 +20,9 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvml"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// additionalInfo allows for the info.Interface to be extened to implement the infoInterface.
|
||||
@@ -52,9 +52,7 @@ func (i additionalInfo) UsesNVGPUModule() (uses bool, reason string) {
|
||||
if ret != nvml.SUCCESS {
|
||||
return false, fmt.Sprintf("failed to initialize nvml: %v", ret)
|
||||
}
|
||||
defer func() {
|
||||
_ = i.nvmllib.Shutdown()
|
||||
}()
|
||||
defer i.nvmllib.Shutdown()
|
||||
|
||||
var names []string
|
||||
|
||||
|
||||
@@ -19,9 +19,9 @@ package info
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvml"
|
||||
"github.com/stretchr/testify/require"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
func TestUsesNVGPUModule(t *testing.T) {
|
||||
|
||||
@@ -17,9 +17,10 @@
|
||||
package info
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvml"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
cdi "tags.cncf.io/container-device-interface/pkg/parser"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
@@ -70,7 +71,7 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
|
||||
r.logger.Infof("Auto-detected mode as '%v'", rmode)
|
||||
}()
|
||||
|
||||
if image.OnlyFullyQualifiedCDIDevices() {
|
||||
if onlyFullyQualifiedCDIDevices(image) {
|
||||
return "cdi"
|
||||
}
|
||||
|
||||
@@ -89,3 +90,14 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
|
||||
|
||||
return "legacy"
|
||||
}
|
||||
|
||||
func onlyFullyQualifiedCDIDevices(image image.CUDA) bool {
|
||||
var hasCDIdevice bool
|
||||
for _, device := range image.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
||||
if !cdi.IsQualifiedName(device) {
|
||||
return false
|
||||
}
|
||||
hasCDIdevice = true
|
||||
}
|
||||
return hasCDIdevice
|
||||
}
|
||||
|
||||
@@ -19,11 +19,9 @@ package info
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
func TestResolveAutoMode(t *testing.T) {
|
||||
@@ -34,8 +32,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
mode string
|
||||
expectedMode string
|
||||
info map[string]bool
|
||||
envmap map[string]string
|
||||
mounts []string
|
||||
image image.CUDA
|
||||
}{
|
||||
{
|
||||
description: "non-auto resolves to input",
|
||||
@@ -122,7 +119,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
description: "cdi devices resolves to cdi",
|
||||
mode: "auto",
|
||||
expectedMode: "cdi",
|
||||
envmap: map[string]string{
|
||||
image: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=all",
|
||||
},
|
||||
},
|
||||
@@ -130,14 +127,14 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
description: "multiple cdi devices resolves to cdi",
|
||||
mode: "auto",
|
||||
expectedMode: "cdi",
|
||||
envmap: map[string]string{
|
||||
image: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,nvidia.com/gpu=1",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "at least one non-cdi device resolves to legacy",
|
||||
mode: "auto",
|
||||
envmap: map[string]string{
|
||||
image: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
||||
},
|
||||
info: map[string]bool{
|
||||
@@ -150,7 +147,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
{
|
||||
description: "at least one non-cdi device resolves to csv",
|
||||
mode: "auto",
|
||||
envmap: map[string]string{
|
||||
image: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
||||
},
|
||||
info: map[string]bool{
|
||||
@@ -160,44 +157,6 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
},
|
||||
expectedMode: "csv",
|
||||
},
|
||||
{
|
||||
description: "cdi mount devices resolves to CDI",
|
||||
mode: "auto",
|
||||
mounts: []string{
|
||||
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||
},
|
||||
expectedMode: "cdi",
|
||||
},
|
||||
{
|
||||
description: "cdi mount and non-CDI devices resolves to legacy",
|
||||
mode: "auto",
|
||||
mounts: []string{
|
||||
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||
"/var/run/nvidia-container-devices/all",
|
||||
},
|
||||
info: map[string]bool{
|
||||
"nvml": true,
|
||||
"tegra": false,
|
||||
"nvgpu": false,
|
||||
},
|
||||
expectedMode: "legacy",
|
||||
},
|
||||
{
|
||||
description: "cdi mount and non-CDI envvar resolves to legacy",
|
||||
mode: "auto",
|
||||
envmap: map[string]string{
|
||||
"NVIDIA_VISIBLE_DEVICES": "0",
|
||||
},
|
||||
mounts: []string{
|
||||
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||
},
|
||||
info: map[string]bool{
|
||||
"nvml": true,
|
||||
"tegra": false,
|
||||
"nvgpu": false,
|
||||
},
|
||||
expectedMode: "legacy",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@@ -218,20 +177,7 @@ func TestResolveAutoMode(t *testing.T) {
|
||||
logger: logger,
|
||||
info: info,
|
||||
}
|
||||
|
||||
var mounts []specs.Mount
|
||||
for _, d := range tc.mounts {
|
||||
mount := specs.Mount{
|
||||
Source: "/dev/null",
|
||||
Destination: d,
|
||||
}
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.envmap),
|
||||
image.WithMounts(mounts),
|
||||
)
|
||||
mode := r.resolveMode(tc.mode, image)
|
||||
mode := r.resolveMode(tc.mode, tc.image)
|
||||
require.EqualValues(t, tc.expectedMode, mode)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ func ParseGPUInformationFile(path string) (GPUInfo, error) {
|
||||
}
|
||||
|
||||
// gpuInfoFrom parses a GPUInfo struct from the specified reader
|
||||
// An information file has the following structure:
|
||||
// An information file has the following strucutre:
|
||||
// $ cat /proc/driver/nvidia/gpus/0000\:06\:00.0/information
|
||||
// Model: Tesla V100-SXM2-16GB
|
||||
// IRQ: 408
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
|
||||
type empty struct {
|
||||
logger logger.Interface
|
||||
path string
|
||||
}
|
||||
|
||||
var _ LDCache = (*empty)(nil)
|
||||
|
||||
// List always returns nil for an empty ldcache
|
||||
func (e *empty) List() ([]string, []string) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Lookup logs a debug message and returns nil for an empty ldcache
|
||||
func (e *empty) Lookup(prefixes ...string) ([]string, []string) {
|
||||
e.logger.Debugf("Calling Lookup(%v) on empty ldcache: %v", prefixes, e.path)
|
||||
return nil, nil
|
||||
}
|
||||
@@ -81,8 +81,6 @@ type entry2 struct {
|
||||
}
|
||||
|
||||
// LDCache represents the interface for performing lookups into the LDCache
|
||||
//
|
||||
//go:generate moq -out ldcache_mock.go . LDCache
|
||||
type LDCache interface {
|
||||
List() ([]string, []string)
|
||||
Lookup(...string) ([]string, []string)
|
||||
@@ -105,14 +103,7 @@ func New(logger logger.Interface, root string) (LDCache, error) {
|
||||
|
||||
logger.Debugf("Opening ld.conf at %v", path)
|
||||
f, err := os.Open(path)
|
||||
if os.IsNotExist(err) {
|
||||
logger.Warningf("Could not find ld.so.cache at %v; creating empty cache", path)
|
||||
e := &empty{
|
||||
logger: logger,
|
||||
path: path,
|
||||
}
|
||||
return e, nil
|
||||
} else if err != nil {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
@@ -243,7 +234,7 @@ func (c *ldcache) getEntries(selected func(string) bool) []entry {
|
||||
return entries
|
||||
}
|
||||
|
||||
// List creates a list of libraries in the ldcache.
|
||||
// List creates a list of libraires in the ldcache.
|
||||
// The 32-bit and 64-bit libraries are returned separately.
|
||||
func (c *ldcache) List() ([]string, []string) {
|
||||
all := func(s string) bool { return true }
|
||||
@@ -296,7 +287,7 @@ func (c *ldcache) resolveSelected(selected func(string) bool) ([]string, []strin
|
||||
func (c *ldcache) resolve(target string) (string, error) {
|
||||
name := filepath.Join(c.root, target)
|
||||
|
||||
c.logger.Debugf("checking %v", name)
|
||||
c.logger.Debugf("checking %v", string(name))
|
||||
|
||||
link, err := symlinks.Resolve(name)
|
||||
if err != nil {
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that LDCacheMock does implement LDCache.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ LDCache = &LDCacheMock{}
|
||||
|
||||
// LDCacheMock is a mock implementation of LDCache.
|
||||
//
|
||||
// func TestSomethingThatUsesLDCache(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked LDCache
|
||||
// mockedLDCache := &LDCacheMock{
|
||||
// ListFunc: func() ([]string, []string) {
|
||||
// panic("mock out the List method")
|
||||
// },
|
||||
// LookupFunc: func(strings ...string) ([]string, []string) {
|
||||
// panic("mock out the Lookup method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedLDCache in code that requires LDCache
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type LDCacheMock struct {
|
||||
// ListFunc mocks the List method.
|
||||
ListFunc func() ([]string, []string)
|
||||
|
||||
// LookupFunc mocks the Lookup method.
|
||||
LookupFunc func(strings ...string) ([]string, []string)
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// List holds details about calls to the List method.
|
||||
List []struct {
|
||||
}
|
||||
// Lookup holds details about calls to the Lookup method.
|
||||
Lookup []struct {
|
||||
// Strings is the strings argument value.
|
||||
Strings []string
|
||||
}
|
||||
}
|
||||
lockList sync.RWMutex
|
||||
lockLookup sync.RWMutex
|
||||
}
|
||||
|
||||
// List calls ListFunc.
|
||||
func (mock *LDCacheMock) List() ([]string, []string) {
|
||||
if mock.ListFunc == nil {
|
||||
panic("LDCacheMock.ListFunc: method is nil but LDCache.List was just called")
|
||||
}
|
||||
callInfo := struct {
|
||||
}{}
|
||||
mock.lockList.Lock()
|
||||
mock.calls.List = append(mock.calls.List, callInfo)
|
||||
mock.lockList.Unlock()
|
||||
return mock.ListFunc()
|
||||
}
|
||||
|
||||
// ListCalls gets all the calls that were made to List.
|
||||
// Check the length with:
|
||||
//
|
||||
// len(mockedLDCache.ListCalls())
|
||||
func (mock *LDCacheMock) ListCalls() []struct {
|
||||
} {
|
||||
var calls []struct {
|
||||
}
|
||||
mock.lockList.RLock()
|
||||
calls = mock.calls.List
|
||||
mock.lockList.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// Lookup calls LookupFunc.
|
||||
func (mock *LDCacheMock) Lookup(strings ...string) ([]string, []string) {
|
||||
if mock.LookupFunc == nil {
|
||||
panic("LDCacheMock.LookupFunc: method is nil but LDCache.Lookup was just called")
|
||||
}
|
||||
callInfo := struct {
|
||||
Strings []string
|
||||
}{
|
||||
Strings: strings,
|
||||
}
|
||||
mock.lockLookup.Lock()
|
||||
mock.calls.Lookup = append(mock.calls.Lookup, callInfo)
|
||||
mock.lockLookup.Unlock()
|
||||
return mock.LookupFunc(strings...)
|
||||
}
|
||||
|
||||
// LookupCalls gets all the calls that were made to Lookup.
|
||||
// Check the length with:
|
||||
//
|
||||
// len(mockedLDCache.LookupCalls())
|
||||
func (mock *LDCacheMock) LookupCalls() []struct {
|
||||
Strings []string
|
||||
} {
|
||||
var calls []struct {
|
||||
Strings []string
|
||||
}
|
||||
mock.lockLookup.RLock()
|
||||
calls = mock.calls.Lookup
|
||||
mock.lockLookup.RUnlock()
|
||||
return calls
|
||||
}
|
||||
@@ -17,23 +17,88 @@
|
||||
package cuda
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
type cudaLocator struct {
|
||||
lookup.Locator
|
||||
logger logger.Interface
|
||||
driverRoot string
|
||||
}
|
||||
|
||||
// Options is a function that configures a cudaLocator.
|
||||
type Options func(*cudaLocator)
|
||||
|
||||
// WithLogger is an option that configures the logger used by the locator.
|
||||
func WithLogger(logger logger.Interface) Options {
|
||||
return func(c *cudaLocator) {
|
||||
c.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
// WithDriverRoot is an option that configures the driver root used by the locator.
|
||||
func WithDriverRoot(driverRoot string) Options {
|
||||
return func(c *cudaLocator) {
|
||||
c.driverRoot = driverRoot
|
||||
}
|
||||
}
|
||||
|
||||
// New creates a new CUDA library locator.
|
||||
func New(libraries lookup.Locator) lookup.Locator {
|
||||
c := cudaLocator{
|
||||
Locator: libraries,
|
||||
func New(opts ...Options) lookup.Locator {
|
||||
c := &cudaLocator{}
|
||||
for _, opt := range opts {
|
||||
opt(c)
|
||||
}
|
||||
return &c
|
||||
|
||||
if c.logger == nil {
|
||||
c.logger = logger.New()
|
||||
}
|
||||
if c.driverRoot == "" {
|
||||
c.driverRoot = "/"
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// Locate returns the path to the libcuda.so.RMVERSION file.
|
||||
// libcuda.so is prefixed to the specified pattern.
|
||||
func (l *cudaLocator) Locate(pattern string) ([]string, error) {
|
||||
return l.Locator.Locate("libcuda.so" + pattern)
|
||||
ldcacheLocator, err := lookup.NewLibraryLocator(
|
||||
l.logger,
|
||||
l.driverRoot,
|
||||
)
|
||||
if err != nil {
|
||||
l.logger.Debugf("Failed to create LDCache locator: %v", err)
|
||||
}
|
||||
|
||||
fullPattern := "libcuda.so" + pattern
|
||||
|
||||
candidates, err := ldcacheLocator.Locate("libcuda.so")
|
||||
if err == nil {
|
||||
for _, c := range candidates {
|
||||
if match, err := filepath.Match(fullPattern, filepath.Base(c)); err != nil || !match {
|
||||
l.logger.Debugf("Skipping non-matching candidate %v: %v", c, err)
|
||||
continue
|
||||
}
|
||||
return []string{c}, nil
|
||||
}
|
||||
}
|
||||
l.logger.Debugf("Could not locate %q in LDCache: Checking predefined library paths.", pattern)
|
||||
|
||||
pathLocator := lookup.NewFileLocator(
|
||||
lookup.WithLogger(l.logger),
|
||||
lookup.WithRoot(l.driverRoot),
|
||||
lookup.WithSearchPaths(
|
||||
"/usr/lib64",
|
||||
"/usr/lib/x86_64-linux-gnu",
|
||||
"/usr/lib/aarch64-linux-gnu",
|
||||
"/usr/lib/x86_64-linux-gnu/nvidia/current",
|
||||
"/usr/lib/aarch64-linux-gnu/nvidia/current",
|
||||
),
|
||||
lookup.WithCount(1),
|
||||
)
|
||||
|
||||
return pathLocator.Locate(fullPattern)
|
||||
}
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
/**
|
||||
# Copyright 2023 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cuda
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
func TestLocate(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
libcudaPath string
|
||||
expected []string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "no libcuda does not resolve library",
|
||||
libcudaPath: "",
|
||||
expected: []string{},
|
||||
expectedError: lookup.ErrNotFound,
|
||||
},
|
||||
{
|
||||
description: "no-ldcache searches /usr/lib64",
|
||||
libcudaPath: "/usr/lib64/libcuda.so.123.34",
|
||||
expected: []string{"/usr/lib64/libcuda.so.123.34"},
|
||||
expectedError: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
driverRoot, err := setupDriverRoot(t, tc.libcudaPath)
|
||||
require.NoError(t, err)
|
||||
|
||||
l := New(
|
||||
lookup.NewLibraryLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
),
|
||||
)
|
||||
|
||||
candidates, err := l.Locate(".*")
|
||||
require.ErrorIs(t, err, tc.expectedError)
|
||||
|
||||
var strippedCandidates []string
|
||||
for _, c := range candidates {
|
||||
// NOTE: We need to strip `/private` on MacOs due to symlink resolution
|
||||
strippedCandidates = append(strippedCandidates, strings.TrimPrefix(c, "/private"))
|
||||
}
|
||||
var expectedWithRoot []string
|
||||
for _, e := range tc.expected {
|
||||
expectedWithRoot = append(expectedWithRoot, filepath.Join(driverRoot, e))
|
||||
}
|
||||
|
||||
require.EqualValues(t, expectedWithRoot, strippedCandidates)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// setupDriverRoot creates a folder that can be used to represent a driver root.
|
||||
// The path to libcuda can be specified and an empty file is created at this location in the driver root.
|
||||
func setupDriverRoot(t *testing.T, libCudaPath string) (string, error) {
|
||||
driverRoot := t.TempDir()
|
||||
|
||||
if libCudaPath == "" {
|
||||
return driverRoot, nil
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Join(driverRoot, filepath.Dir(libCudaPath)), 0755); err != nil {
|
||||
return "", fmt.Errorf("falied to create required driver root folder: %w", err)
|
||||
}
|
||||
|
||||
libCuda, err := os.Create(filepath.Join(driverRoot, libCudaPath))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create dummy libcuda.so: %w", err)
|
||||
}
|
||||
defer libCuda.Close()
|
||||
|
||||
return driverRoot, nil
|
||||
}
|
||||
@@ -27,55 +27,49 @@ import (
|
||||
// file can be used to locate file (or file-like elements) at a specified set of
|
||||
// prefixes. The validity of a file is determined by a filter function.
|
||||
type file struct {
|
||||
builder
|
||||
prefixes []string
|
||||
logger logger.Interface
|
||||
root string
|
||||
prefixes []string
|
||||
filter func(string) error
|
||||
count int
|
||||
isOptional bool
|
||||
}
|
||||
|
||||
// builder defines the builder for a file locator.
|
||||
type builder struct {
|
||||
logger logger.Interface
|
||||
root string
|
||||
searchPaths []string
|
||||
filter func(string) error
|
||||
count int
|
||||
isOptional bool
|
||||
}
|
||||
|
||||
// Option defines a function for passing builder to the NewFileLocator() call
|
||||
type Option func(*builder)
|
||||
// Option defines a function for passing options to the NewFileLocator() call
|
||||
type Option func(*file)
|
||||
|
||||
// WithRoot sets the root for the file locator
|
||||
func WithRoot(root string) Option {
|
||||
return func(f *builder) {
|
||||
return func(f *file) {
|
||||
f.root = root
|
||||
}
|
||||
}
|
||||
|
||||
// WithLogger sets the logger for the file locator
|
||||
func WithLogger(logger logger.Interface) Option {
|
||||
return func(f *builder) {
|
||||
return func(f *file) {
|
||||
f.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
// WithSearchPaths sets the search paths for the file locator.
|
||||
func WithSearchPaths(paths ...string) Option {
|
||||
return func(f *builder) {
|
||||
f.searchPaths = paths
|
||||
return func(f *file) {
|
||||
f.prefixes = paths
|
||||
}
|
||||
}
|
||||
|
||||
// WithFilter sets the filter for the file locator
|
||||
// The filter is called for each candidate file and candidates that return nil are considered.
|
||||
func WithFilter(assert func(string) error) Option {
|
||||
return func(f *builder) {
|
||||
return func(f *file) {
|
||||
f.filter = assert
|
||||
}
|
||||
}
|
||||
|
||||
// WithCount sets the maximum number of candidates to discover
|
||||
func WithCount(count int) Option {
|
||||
return func(f *builder) {
|
||||
return func(f *file) {
|
||||
f.count = count
|
||||
}
|
||||
}
|
||||
@@ -83,42 +77,32 @@ func WithCount(count int) Option {
|
||||
// WithOptional sets the optional flag for the file locator
|
||||
// If the optional flag is set, the locator will not return an error if the file is not found.
|
||||
func WithOptional(optional bool) Option {
|
||||
return func(f *builder) {
|
||||
return func(f *file) {
|
||||
f.isOptional = optional
|
||||
}
|
||||
}
|
||||
|
||||
func newBuilder(opts ...Option) *builder {
|
||||
o := &builder{}
|
||||
for _, opt := range opts {
|
||||
opt(o)
|
||||
}
|
||||
if o.logger == nil {
|
||||
o.logger = logger.New()
|
||||
}
|
||||
if o.filter == nil {
|
||||
o.filter = assertFile
|
||||
}
|
||||
return o
|
||||
}
|
||||
|
||||
func (o builder) build() *file {
|
||||
f := file{
|
||||
builder: o,
|
||||
// Since the `Locate` implementations rely on the root already being specified we update
|
||||
// the prefixes to include the root.
|
||||
prefixes: getSearchPrefixes(o.root, o.searchPaths...),
|
||||
}
|
||||
return &f
|
||||
}
|
||||
|
||||
// NewFileLocator creates a Locator that can be used to find files with the specified builder.
|
||||
// NewFileLocator creates a Locator that can be used to find files with the specified options.
|
||||
func NewFileLocator(opts ...Option) Locator {
|
||||
return newFileLocator(opts...)
|
||||
}
|
||||
|
||||
func newFileLocator(opts ...Option) *file {
|
||||
return newBuilder(opts...).build()
|
||||
f := &file{}
|
||||
for _, opt := range opts {
|
||||
opt(f)
|
||||
}
|
||||
if f.logger == nil {
|
||||
f.logger = logger.New()
|
||||
}
|
||||
if f.filter == nil {
|
||||
f.filter = assertFile
|
||||
}
|
||||
// Since the `Locate` implementations rely on the root already being specified we update
|
||||
// the prefixes to include the root.
|
||||
f.prefixes = getSearchPrefixes(f.root, f.prefixes...)
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
// getSearchPrefixes generates a list of unique paths to be searched by a file locator.
|
||||
@@ -160,7 +144,6 @@ var _ Locator = (*file)(nil)
|
||||
func (p file) Locate(pattern string) ([]string, error) {
|
||||
var filenames []string
|
||||
|
||||
p.logger.Debugf("Locating %q in %v", pattern, p.prefixes)
|
||||
visit:
|
||||
for _, prefix := range p.prefixes {
|
||||
pathPattern := filepath.Join(prefix, pattern)
|
||||
@@ -185,7 +168,7 @@ visit:
|
||||
}
|
||||
|
||||
if !p.isOptional && len(filenames) == 0 {
|
||||
return nil, fmt.Errorf("pattern %v %w", pattern, ErrNotFound)
|
||||
return nil, fmt.Errorf("pattern %v not found", pattern)
|
||||
}
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
@@ -18,83 +18,51 @@ package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
type ldcacheLocator struct {
|
||||
logger logger.Interface
|
||||
cache ldcache.LDCache
|
||||
type library struct {
|
||||
logger logger.Interface
|
||||
symlink Locator
|
||||
cache ldcache.LDCache
|
||||
}
|
||||
|
||||
var _ Locator = (*ldcacheLocator)(nil)
|
||||
var _ Locator = (*library)(nil)
|
||||
|
||||
// NewLibraryLocator creates a library locator using the specified options.
|
||||
func NewLibraryLocator(opts ...Option) Locator {
|
||||
b := newBuilder(opts...)
|
||||
|
||||
// If search paths are already specified, we return a locator for the specified search paths.
|
||||
if len(b.searchPaths) > 0 {
|
||||
return NewSymlinkLocator(
|
||||
WithLogger(b.logger),
|
||||
WithSearchPaths(b.searchPaths...),
|
||||
WithRoot("/"),
|
||||
)
|
||||
}
|
||||
|
||||
opts = append(opts,
|
||||
WithSearchPaths([]string{
|
||||
"/",
|
||||
"/usr/lib64",
|
||||
"/usr/lib/x86_64-linux-gnu",
|
||||
"/usr/lib/aarch64-linux-gnu",
|
||||
"/usr/lib/x86_64-linux-gnu/nvidia/current",
|
||||
"/usr/lib/aarch64-linux-gnu/nvidia/current",
|
||||
"/lib64",
|
||||
"/lib/x86_64-linux-gnu",
|
||||
"/lib/aarch64-linux-gnu",
|
||||
"/lib/x86_64-linux-gnu/nvidia/current",
|
||||
"/lib/aarch64-linux-gnu/nvidia/current",
|
||||
}...),
|
||||
)
|
||||
// We construct a symlink locator for expected library locations.
|
||||
symlinkLocator := NewSymlinkLocator(opts...)
|
||||
|
||||
l := First(
|
||||
symlinkLocator,
|
||||
newLdcacheLocator(opts...),
|
||||
)
|
||||
return l
|
||||
}
|
||||
|
||||
func newLdcacheLocator(opts ...Option) Locator {
|
||||
b := newBuilder(opts...)
|
||||
|
||||
cache, err := ldcache.New(b.logger, b.root)
|
||||
// NewLibraryLocator creates a library locator using the specified logger.
|
||||
func NewLibraryLocator(logger logger.Interface, root string) (Locator, error) {
|
||||
cache, err := ldcache.New(logger, root)
|
||||
if err != nil {
|
||||
// If we failed to open the LDCache, we default to a symlink locator.
|
||||
b.logger.Warningf("Failed to load ldcache: %v", err)
|
||||
return nil
|
||||
return nil, fmt.Errorf("error loading ldcache: %v", err)
|
||||
}
|
||||
|
||||
return &ldcacheLocator{
|
||||
logger: b.logger,
|
||||
cache: cache,
|
||||
l := library{
|
||||
logger: logger,
|
||||
symlink: NewSymlinkLocator(WithLogger(logger), WithRoot(root)),
|
||||
cache: cache,
|
||||
}
|
||||
|
||||
return &l, nil
|
||||
}
|
||||
|
||||
// Locate finds the specified libraryname.
|
||||
// If the input is a library name, the ldcache is searched otherwise the
|
||||
// provided path is resolved as a symlink.
|
||||
func (l ldcacheLocator) Locate(libname string) ([]string, error) {
|
||||
func (l library) Locate(libname string) ([]string, error) {
|
||||
if strings.Contains(libname, "/") {
|
||||
return l.symlink.Locate(libname)
|
||||
}
|
||||
|
||||
paths32, paths64 := l.cache.Lookup(libname)
|
||||
if len(paths32) > 0 {
|
||||
l.logger.Warningf("Ignoring 32-bit libraries for %v: %v", libname, paths32)
|
||||
}
|
||||
|
||||
if len(paths64) == 0 {
|
||||
return nil, fmt.Errorf("64-bit library %v: %w", libname, ErrNotFound)
|
||||
return nil, fmt.Errorf("64-bit library %v not found", libname)
|
||||
}
|
||||
|
||||
return paths64, nil
|
||||
|
||||
@@ -1,212 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
)
|
||||
|
||||
func TestLDCacheLocator(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testDir := t.TempDir()
|
||||
symlinkDir := filepath.Join(testDir, "/lib/symlink")
|
||||
require.NoError(t, os.MkdirAll(symlinkDir, 0755))
|
||||
|
||||
versionLib := filepath.Join(symlinkDir, "libcuda.so.1.2.3")
|
||||
soLink := filepath.Join(symlinkDir, "libcuda.so")
|
||||
sonameLink := filepath.Join(symlinkDir, "libcuda.so.1")
|
||||
|
||||
_, err := os.Create(versionLib)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, os.Symlink(versionLib, sonameLink))
|
||||
require.NoError(t, os.Symlink(sonameLink, soLink))
|
||||
|
||||
lut := newLdcacheLocator(
|
||||
WithLogger(logger),
|
||||
WithRoot(testDir),
|
||||
)
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
libname string
|
||||
ldcacheMap map[string]string
|
||||
expected []string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "lib only resolves in LDCache",
|
||||
libname: "libcuda.so",
|
||||
ldcacheMap: map[string]string{
|
||||
"libcuda.so": "/lib/from/ldcache/libcuda.so.4.5.6",
|
||||
},
|
||||
expected: []string{"/lib/from/ldcache/libcuda.so.4.5.6"},
|
||||
},
|
||||
{
|
||||
description: "lib only not in LDCache returns error",
|
||||
libname: "libnotcuda.so",
|
||||
expectedError: ErrNotFound,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// We override the LDCache with a mock implementation
|
||||
l := lut.(*ldcacheLocator)
|
||||
l.cache = &ldcache.LDCacheMock{
|
||||
LookupFunc: func(strings ...string) ([]string, []string) {
|
||||
var result []string
|
||||
for _, s := range strings {
|
||||
if v, ok := tc.ldcacheMap[s]; ok {
|
||||
result = append(result, v)
|
||||
}
|
||||
}
|
||||
return nil, result
|
||||
},
|
||||
}
|
||||
|
||||
candidates, err := lut.Locate(tc.libname)
|
||||
require.ErrorIs(t, err, tc.expectedError)
|
||||
|
||||
var cleanedCandidates []string
|
||||
for _, c := range candidates {
|
||||
// On MacOS /var and /tmp symlink to /private/var and /private/tmp which is included in the resolved path.
|
||||
cleanedCandidates = append(cleanedCandidates, strings.TrimPrefix(c, "/private"))
|
||||
}
|
||||
require.EqualValues(t, tc.expected, cleanedCandidates)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLibraryLocator(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testDir := t.TempDir()
|
||||
symlinkDir := filepath.Join(testDir, "/lib/symlink")
|
||||
require.NoError(t, os.MkdirAll(symlinkDir, 0755))
|
||||
|
||||
versionLib := filepath.Join(symlinkDir, "libcuda.so.1.2.3")
|
||||
soLink := filepath.Join(symlinkDir, "libcuda.so")
|
||||
sonameLink := filepath.Join(symlinkDir, "libcuda.so.1")
|
||||
|
||||
f, err := os.Create(versionLib)
|
||||
require.NoError(t, err)
|
||||
f.Close()
|
||||
require.NoError(t, os.Symlink(versionLib, sonameLink))
|
||||
require.NoError(t, os.Symlink(sonameLink, soLink))
|
||||
|
||||
// We create a set of symlinks for duplicate resolution
|
||||
libTarget1 := filepath.Join(symlinkDir, "libtarget.so.1.2.3")
|
||||
source1 := filepath.Join(symlinkDir, "libsource1.so")
|
||||
source2 := filepath.Join(symlinkDir, "libsource2.so")
|
||||
|
||||
target1, err := os.Create(libTarget1)
|
||||
require.NoError(t, err)
|
||||
target1.Close()
|
||||
require.NoError(t, os.Symlink(libTarget1, source1))
|
||||
require.NoError(t, os.Symlink(source1, source2))
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
libname string
|
||||
librarySearchPaths []string
|
||||
expected []string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "slash in path resoves symlink",
|
||||
libname: "/lib/symlink/libcuda.so",
|
||||
expected: []string{filepath.Join(testDir, "/lib/symlink/libcuda.so.1.2.3")},
|
||||
},
|
||||
{
|
||||
description: "slash in path resoves symlink",
|
||||
libname: "/lib/symlink/libcuda.so.1",
|
||||
expected: []string{filepath.Join(testDir, "/lib/symlink/libcuda.so.1.2.3")},
|
||||
},
|
||||
{
|
||||
description: "slash in path with pattern resolves symlinks",
|
||||
libname: "/lib/symlink/libcuda.so.*",
|
||||
expected: []string{filepath.Join(testDir, "/lib/symlink/libcuda.so.1.2.3")},
|
||||
},
|
||||
{
|
||||
description: "library not found returns error",
|
||||
libname: "/lib/symlink/libnotcuda.so",
|
||||
expectedError: ErrNotFound,
|
||||
},
|
||||
{
|
||||
description: "slash in path with pattern resoves symlink",
|
||||
libname: "/lib/symlink/libcuda.so.*.*.*",
|
||||
expected: []string{filepath.Join(testDir, "/lib/symlink/libcuda.so.1.2.3")},
|
||||
},
|
||||
{
|
||||
description: "symlinks are deduplicated",
|
||||
libname: "/lib/symlink/libsource*.so",
|
||||
expected: []string{filepath.Join(testDir, "/lib/symlink/libtarget.so.1.2.3")},
|
||||
},
|
||||
{
|
||||
description: "pattern resolves to multiple targets",
|
||||
libname: "/lib/symlink/lib*.so.1.2.3",
|
||||
expected: []string{
|
||||
filepath.Join(testDir, "/lib/symlink/libcuda.so.1.2.3"),
|
||||
filepath.Join(testDir, "/lib/symlink/libtarget.so.1.2.3"),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "search paths are searched",
|
||||
libname: "lib*.so.1.2.3",
|
||||
librarySearchPaths: []string{filepath.Join(testDir, "/lib/symlink")},
|
||||
expected: []string{
|
||||
filepath.Join(testDir, "/lib/symlink/libcuda.so.1.2.3"),
|
||||
filepath.Join(testDir, "/lib/symlink/libtarget.so.1.2.3"),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "search paths are absolute to root",
|
||||
libname: "lib*.so.1.2.3",
|
||||
librarySearchPaths: []string{"/lib/symlink"},
|
||||
expectedError: ErrNotFound,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
lut := NewLibraryLocator(
|
||||
WithLogger(logger),
|
||||
WithRoot(testDir),
|
||||
WithSearchPaths(tc.librarySearchPaths...),
|
||||
)
|
||||
|
||||
candidates, err := lut.Locate(tc.libname)
|
||||
require.ErrorIs(t, err, tc.expectedError)
|
||||
|
||||
var cleanedCandidates []string
|
||||
for _, c := range candidates {
|
||||
// On MacOS /var and /tmp symlink to /private/var and /private/tmp which is included in the resolved path.
|
||||
cleanedCandidates = append(cleanedCandidates, strings.TrimPrefix(c, "/private"))
|
||||
}
|
||||
require.EqualValues(t, tc.expected, cleanedCandidates)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -16,14 +16,9 @@
|
||||
|
||||
package lookup
|
||||
|
||||
import "errors"
|
||||
|
||||
//go:generate moq -stub -out locator_mock.go . Locator
|
||||
|
||||
// Locator defines the interface for locating files on a system.
|
||||
type Locator interface {
|
||||
Locate(string) ([]string, error)
|
||||
}
|
||||
|
||||
// ErrNotFound indicates that a specified pattern or file could not be found.
|
||||
var ErrNotFound = errors.New("not found")
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
/**
|
||||
# Copyright 2023 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package lookup
|
||||
|
||||
import "errors"
|
||||
|
||||
type first []Locator
|
||||
|
||||
// First returns a locator that returns the first non-empty match
|
||||
func First(locators ...Locator) Locator {
|
||||
var f first
|
||||
for _, l := range locators {
|
||||
if l == nil {
|
||||
continue
|
||||
}
|
||||
f = append(f, l)
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
// Locate returns the results for the first locator that returns a non-empty non-error result.
|
||||
func (f first) Locate(pattern string) ([]string, error) {
|
||||
var allErrors []error
|
||||
for _, l := range f {
|
||||
if l == nil {
|
||||
continue
|
||||
}
|
||||
candidates, err := l.Locate(pattern)
|
||||
if err != nil {
|
||||
allErrors = append(allErrors, err)
|
||||
continue
|
||||
}
|
||||
if len(candidates) > 0 {
|
||||
return candidates, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, errors.Join(allErrors...)
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
/**
|
||||
# Copyright 2023 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package root
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
// Driver represents a filesystem in which a set of drivers or devices is defined.
|
||||
type Driver struct {
|
||||
logger logger.Interface
|
||||
// Root represents the root from the perspective of the driver libraries and binaries.
|
||||
Root string
|
||||
// librarySearchPaths specifies explicit search paths for discovering libraries.
|
||||
librarySearchPaths []string
|
||||
}
|
||||
|
||||
// New creates a new Driver root at the specified path.
|
||||
// TODO: Use functional options here.
|
||||
func New(logger logger.Interface, path string, librarySearchPaths []string) *Driver {
|
||||
return &Driver{
|
||||
logger: logger,
|
||||
Root: path,
|
||||
librarySearchPaths: normalizeSearchPaths(librarySearchPaths...),
|
||||
}
|
||||
}
|
||||
|
||||
// Drivers returns a Locator for driver libraries.
|
||||
func (r *Driver) Libraries() lookup.Locator {
|
||||
return lookup.NewLibraryLocator(
|
||||
lookup.WithLogger(r.logger),
|
||||
lookup.WithRoot(r.Root),
|
||||
lookup.WithSearchPaths(r.librarySearchPaths...),
|
||||
)
|
||||
}
|
||||
|
||||
// normalizeSearchPaths takes a list of paths and normalized these.
|
||||
// Each of the elements in the list is expanded if it is a path list and the
|
||||
// resultant list is returned.
|
||||
// This allows, for example, for the contents of `PATH` or `LD_LIBRARY_PATH` to
|
||||
// be passed as a search path directly.
|
||||
func normalizeSearchPaths(paths ...string) []string {
|
||||
var normalized []string
|
||||
for _, path := range paths {
|
||||
normalized = append(normalized, filepath.SplitList(path)...)
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
@@ -103,19 +103,14 @@ func (p symlink) Locate(pattern string) ([]string, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var targets []string
|
||||
seen := make(map[string]bool)
|
||||
for _, candidate := range candidates {
|
||||
target, err := filepath.EvalSymlinks(candidate)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to resolve link: %w", err)
|
||||
}
|
||||
if seen[target] {
|
||||
continue
|
||||
}
|
||||
seen[target] = true
|
||||
targets = append(targets, target)
|
||||
if len(candidates) != 1 {
|
||||
return nil, fmt.Errorf("failed to uniquely resolve symlink %v: %v", pattern, candidates)
|
||||
}
|
||||
return targets, err
|
||||
|
||||
target, err := filepath.EvalSymlinks(candidates[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to resolve link: %v", err)
|
||||
}
|
||||
|
||||
return []string{target}, err
|
||||
}
|
||||
|
||||
@@ -27,12 +27,16 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
)
|
||||
|
||||
type cdiModifier struct {
|
||||
logger logger.Interface
|
||||
specDirs []string
|
||||
devices []string
|
||||
}
|
||||
|
||||
// NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the
|
||||
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES environment variable is
|
||||
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES enviroment variable is
|
||||
// used to select the devices to include.
|
||||
func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
devices, err := getDevicesFromSpec(logger, ociSpec, cfg)
|
||||
@@ -45,19 +49,6 @@ func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spe
|
||||
}
|
||||
logger.Debugf("Creating CDI modifier for devices: %v", devices)
|
||||
|
||||
automaticDevices := filterAutomaticDevices(devices)
|
||||
if len(automaticDevices) != len(devices) && len(automaticDevices) > 0 {
|
||||
return nil, fmt.Errorf("requesting a CDI device with vendor 'runtime.nvidia.com' is not supported when requesting other CDI devices")
|
||||
}
|
||||
if len(automaticDevices) > 0 {
|
||||
automaticModifier, err := newAutomaticCDISpecModifier(logger, cfg, automaticDevices)
|
||||
if err == nil {
|
||||
return automaticModifier, nil
|
||||
}
|
||||
logger.Warningf("Failed to create the automatic CDI modifier: %w", err)
|
||||
logger.Debugf("Falling back to the standard CDI modifier")
|
||||
}
|
||||
|
||||
return cdi.New(
|
||||
cdi.WithLogger(logger),
|
||||
cdi.WithDevices(devices...),
|
||||
@@ -83,13 +74,6 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if cfg.AcceptDeviceListAsVolumeMounts {
|
||||
mountDevices := container.CDIDevicesFromMounts()
|
||||
if len(mountDevices) > 0 {
|
||||
return mountDevices, nil
|
||||
}
|
||||
}
|
||||
|
||||
envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar)
|
||||
|
||||
var devices []string
|
||||
@@ -149,71 +133,3 @@ func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]s
|
||||
|
||||
return annotationDevices, nil
|
||||
}
|
||||
|
||||
// filterAutomaticDevices searches for "automatic" device names in the input slice.
|
||||
// "Automatic" devices are a well-defined list of CDI device names which, when requested,
|
||||
// trigger the generation of a CDI spec at runtime. This removes the need to generate a
|
||||
// CDI spec on the system a-priori as well as keep it up-to-date.
|
||||
func filterAutomaticDevices(devices []string) []string {
|
||||
var automatic []string
|
||||
for _, device := range devices {
|
||||
vendor, class, _ := parser.ParseDevice(device)
|
||||
if vendor == "runtime.nvidia.com" && class == "gpu" {
|
||||
automatic = append(automatic, device)
|
||||
}
|
||||
}
|
||||
return automatic
|
||||
}
|
||||
|
||||
func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) {
|
||||
logger.Debugf("Generating in-memory CDI specs for devices %v", devices)
|
||||
spec, err := generateAutomaticCDISpec(logger, cfg, devices)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate CDI spec: %w", err)
|
||||
}
|
||||
cdiModifier, err := cdi.New(
|
||||
cdi.WithLogger(logger),
|
||||
cdi.WithSpec(spec.Raw()),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct CDI modifier: %w", err)
|
||||
}
|
||||
|
||||
return cdiModifier, nil
|
||||
}
|
||||
|
||||
func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devices []string) (spec.Interface, error) {
|
||||
cdilib, err := nvcdi.New(
|
||||
nvcdi.WithLogger(logger),
|
||||
nvcdi.WithNVIDIACTKPath(cfg.NVIDIACTKConfig.Path),
|
||||
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
|
||||
nvcdi.WithVendor("runtime.nvidia.com"),
|
||||
nvcdi.WithClass("gpu"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct CDI library: %w", err)
|
||||
}
|
||||
|
||||
identifiers := []string{}
|
||||
for _, device := range devices {
|
||||
_, _, id := parser.ParseDevice(device)
|
||||
identifiers = append(identifiers, id)
|
||||
}
|
||||
|
||||
deviceSpecs, err := cdilib.GetDeviceSpecsByID(identifiers...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CDI device specs: %w", err)
|
||||
}
|
||||
|
||||
commonEdits, err := cdilib.GetCommonEdits()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get common CDI spec edits: %w", err)
|
||||
}
|
||||
|
||||
return spec.New(
|
||||
spec.WithDeviceSpecs(deviceSpecs),
|
||||
spec.WithEdits(*commonEdits.ContainerEdits),
|
||||
spec.WithVendor("runtime.nvidia.com"),
|
||||
spec.WithClass("gpu"),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@ var _ oci.SpecModifier = (*fromCDISpec)(nil)
|
||||
// Modify applies the mofiications defined by the raw CDI spec to the incomming OCI spec.
|
||||
func (m fromCDISpec) Modify(spec *specs.Spec) error {
|
||||
for _, device := range m.cdiSpec.Devices {
|
||||
device := device
|
||||
cdiDevice := cdi.Device{
|
||||
Device: &device,
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/cuda"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
@@ -30,6 +31,12 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||
)
|
||||
|
||||
// csvMode represents the modifications as performed by the csv runtime mode
|
||||
type csvMode struct {
|
||||
logger logger.Interface
|
||||
discoverer discover.Discover
|
||||
}
|
||||
|
||||
const (
|
||||
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
|
||||
visibleDevicesVoid = "void"
|
||||
@@ -55,7 +62,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUD
|
||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||
}
|
||||
|
||||
if image.Getenv(nvidiaRequireJetpackEnvvar) != "csv-mounts=all" {
|
||||
if nvidiaRequireJetpack, _ := image[nvidiaRequireJetpackEnvvar]; nvidiaRequireJetpack != "csv-mounts=all" {
|
||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||
}
|
||||
|
||||
|
||||
@@ -19,12 +19,11 @@ package modifier
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
func TestNewCSVModifier(t *testing.T) {
|
||||
@@ -33,33 +32,30 @@ func TestNewCSVModifier(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
cfg *config.Config
|
||||
envmap map[string]string
|
||||
image image.CUDA
|
||||
expectedError error
|
||||
expectedNil bool
|
||||
}{
|
||||
{
|
||||
description: "visible devices not set returns nil",
|
||||
envmap: map[string]string{},
|
||||
image: image.CUDA{},
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "visible devices empty returns nil",
|
||||
envmap: map[string]string{"NVIDIA_VISIBLE_DEVICES": ""},
|
||||
image: image.CUDA{"NVIDIA_VISIBLE_DEVICES": ""},
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "visible devices 'void' returns nil",
|
||||
envmap: map[string]string{"NVIDIA_VISIBLE_DEVICES": "void"},
|
||||
image: image.CUDA{"NVIDIA_VISIBLE_DEVICES": "void"},
|
||||
expectedNil: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.envmap),
|
||||
)
|
||||
m, err := NewCSVModifier(logger, tc.cfg, image)
|
||||
m, err := NewCSVModifier(logger, tc.cfg, tc.image)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
|
||||
@@ -19,12 +19,11 @@ package modifier
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type discoverModifier struct {
|
||||
|
||||
@@ -20,11 +20,10 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
)
|
||||
|
||||
func TestDiscoverModifier(t *testing.T) {
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaGDSEnvvar = "NVIDIA_GDS"
|
||||
nvidiaMOFEDEnvvar = "NVIDIA_MOFED"
|
||||
nvidiaNVSWITCHEnvvar = "NVIDIA_NVSWITCH"
|
||||
nvidiaGDRCOPYEnvvar = "NVIDIA_GDRCOPY"
|
||||
)
|
||||
|
||||
// NewFeatureGatedModifier creates the modifiers for optional features.
|
||||
// These include:
|
||||
//
|
||||
// NVIDIA_GDS=enabled
|
||||
// NVIDIA_MOFED=enabled
|
||||
// NVIDIA_NVSWITCH=enabled
|
||||
// NVIDIA_GDRCOPY=enabled
|
||||
//
|
||||
// If not devices are selected, no changes are made.
|
||||
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
||||
logger.Infof("No modification required; no devices requested")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var discoverers []discover.Discover
|
||||
|
||||
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
|
||||
devRoot := cfg.NVIDIAContainerCLIConfig.Root
|
||||
|
||||
if image.Getenv(nvidiaGDSEnvvar) == "enabled" {
|
||||
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err)
|
||||
}
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
if image.Getenv(nvidiaMOFEDEnvvar) == "enabled" {
|
||||
d, err := discover.NewMOFEDDiscoverer(logger, devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %w", err)
|
||||
}
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
if image.Getenv(nvidiaNVSWITCHEnvvar) == "enabled" {
|
||||
d, err := discover.NewNvSwitchDiscoverer(logger, devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for NVSWITCH devices: %w", err)
|
||||
}
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
if image.Getenv(nvidiaGDRCOPYEnvvar) == "enabled" {
|
||||
d, err := discover.NewGDRCopyDiscoverer(logger, devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for GDRCopy devices: %w", err)
|
||||
}
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
|
||||
}
|
||||
51
internal/modifier/gds.go
Normal file
51
internal/modifier/gds.go
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaGDSEnvvar = "NVIDIA_GDS"
|
||||
)
|
||||
|
||||
// NewGDSModifier creates the modifiers for GDS devices.
|
||||
// If the spec does not contain the NVIDIA_GDS=enabled environment variable no changes are made.
|
||||
func NewGDSModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
||||
logger.Infof("No modification required; no devices requested")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if gds, _ := image[nvidiaGDSEnvvar]; gds != "enabled" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
d, err := discover.NewGDSDiscoverer(logger, cfg.NVIDIAContainerCLIConfig.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %v", err)
|
||||
}
|
||||
|
||||
return NewModifierFromDiscoverer(logger, d)
|
||||
}
|
||||
@@ -23,7 +23,6 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
@@ -35,35 +34,16 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// TODO: We should not just pass `nil` as the search path here.
|
||||
driver := root.New(logger, cfg.NVIDIAContainerCLIConfig.Root, nil)
|
||||
nvidiaCTKPath := cfg.NVIDIACTKConfig.Path
|
||||
|
||||
mounts, err := discover.NewGraphicsMountsDiscoverer(
|
||||
logger,
|
||||
driver,
|
||||
nvidiaCTKPath,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
|
||||
}
|
||||
|
||||
// In standard usage, the devRoot is the same as the driver.Root.
|
||||
devRoot := driver.Root
|
||||
drmNodes, err := discover.NewDRMNodesDiscoverer(
|
||||
d, err := discover.NewGraphicsDiscoverer(
|
||||
logger,
|
||||
image.DevicesFromEnvvars(visibleDevicesEnvvar),
|
||||
devRoot,
|
||||
nvidiaCTKPath,
|
||||
cfg.NVIDIAContainerCLIConfig.Root,
|
||||
cfg.NVIDIACTKConfig.Path,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer: %v", err)
|
||||
}
|
||||
|
||||
d := discover.Merge(
|
||||
drmNodes,
|
||||
mounts,
|
||||
)
|
||||
return NewModifierFromDiscoverer(logger, d)
|
||||
}
|
||||
|
||||
|
||||
@@ -19,15 +19,14 @@ package modifier
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGraphicsModifier(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
envmap map[string]string
|
||||
cudaImage image.CUDA
|
||||
expectedRequired bool
|
||||
}{
|
||||
{
|
||||
@@ -35,20 +34,20 @@ func TestGraphicsModifier(t *testing.T) {
|
||||
},
|
||||
{
|
||||
description: "devices with no capabilities does not create modifier",
|
||||
envmap: map[string]string{
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "devices with no non-graphics does not create modifier",
|
||||
envmap: map[string]string{
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "compute",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "devices with all capabilities creates modifier",
|
||||
envmap: map[string]string{
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "all",
|
||||
},
|
||||
@@ -56,7 +55,7 @@ func TestGraphicsModifier(t *testing.T) {
|
||||
},
|
||||
{
|
||||
description: "devices with graphics capability creates modifier",
|
||||
envmap: map[string]string{
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "graphics",
|
||||
},
|
||||
@@ -64,7 +63,7 @@ func TestGraphicsModifier(t *testing.T) {
|
||||
},
|
||||
{
|
||||
description: "devices with compute,graphics capability creates modifier",
|
||||
envmap: map[string]string{
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "compute,graphics",
|
||||
},
|
||||
@@ -72,7 +71,7 @@ func TestGraphicsModifier(t *testing.T) {
|
||||
},
|
||||
{
|
||||
description: "devices with display capability creates modifier",
|
||||
envmap: map[string]string{
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "display",
|
||||
},
|
||||
@@ -80,7 +79,7 @@ func TestGraphicsModifier(t *testing.T) {
|
||||
},
|
||||
{
|
||||
description: "devices with display,graphics capability creates modifier",
|
||||
envmap: map[string]string{
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "display,graphics",
|
||||
},
|
||||
@@ -90,10 +89,7 @@ func TestGraphicsModifier(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.envmap),
|
||||
)
|
||||
required, _ := requiresGraphicsModifier(image)
|
||||
required, _ := requiresGraphicsModifier(tc.cudaImage)
|
||||
require.EqualValues(t, tc.expectedRequired, required)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,11 +19,10 @@ package modifier
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
// nvidiaContainerRuntimeHookRemover is a spec modifer that detects and removes inserted nvidia-container-runtime hooks
|
||||
@@ -50,7 +49,6 @@ func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error {
|
||||
var newPrestart []specs.Hook
|
||||
|
||||
for _, hook := range spec.Hooks.Prestart {
|
||||
hook := hook
|
||||
if isNVIDIAContainerRuntimeHook(&hook) {
|
||||
m.logger.Debugf("Removing hook %v", hook)
|
||||
continue
|
||||
|
||||
@@ -17,9 +17,8 @@
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type list struct {
|
||||
|
||||
51
internal/modifier/mofed.go
Normal file
51
internal/modifier/mofed.go
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaMOFEDEnvvar = "NVIDIA_MOFED"
|
||||
)
|
||||
|
||||
// NewMOFEDModifier creates the modifiers for MOFED devices.
|
||||
// If the spec does not contain the NVIDIA_MOFED=enabled environment variable no changes are made.
|
||||
func NewMOFEDModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
||||
logger.Infof("No modification required; no devices requested")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if mofed, _ := image[nvidiaMOFEDEnvvar]; mofed != "enabled" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
d, err := discover.NewMOFEDDiscoverer(logger, cfg.NVIDIAContainerCLIConfig.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %v", err)
|
||||
}
|
||||
|
||||
return NewModifierFromDiscoverer(logger, d)
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user