mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
171 Commits
v1.11.0-rc
...
v1.12.0-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
217a135eb1 | ||
|
|
22e65b320b | ||
|
|
53bb940b30 | ||
|
|
1c1ad8098a | ||
|
|
203db4390c | ||
|
|
b6d9c2c1ad | ||
|
|
429ef4d4e9 | ||
|
|
25759ca933 | ||
|
|
74abea07e2 | ||
|
|
7955bb1a84 | ||
|
|
75b11eb80a | ||
|
|
c958817eef | ||
|
|
80f8c2a418 | ||
|
|
08640a6f64 | ||
|
|
9db31f7506 | ||
|
|
7fd40632fe | ||
|
|
6ef19d2925 | ||
|
|
83ce83239b | ||
|
|
30fb486e44 | ||
|
|
0022661565 | ||
|
|
28e882f26f | ||
|
|
71fbe7a812 | ||
|
|
ce3d94af1a | ||
|
|
0bc09665a8 | ||
|
|
205ba098e9 | ||
|
|
877832da69 | ||
|
|
b7ba96a72e | ||
|
|
93c59f2d9c | ||
|
|
5a56b658ba | ||
|
|
99889671b5 | ||
|
|
a2fb017208 | ||
|
|
f7021d84b5 | ||
|
|
c793fc27d8 | ||
|
|
3d2328bdfd | ||
|
|
76b69f45de | ||
|
|
73e65edaa9 | ||
|
|
cd7ee5a435 | ||
|
|
eac4faddc6 | ||
|
|
bc8a73dde4 | ||
|
|
624b9d8ee6 | ||
|
|
9d6e2ff1b0 | ||
|
|
aca0c7bc5a | ||
|
|
db47b58275 | ||
|
|
59bf7607ce | ||
|
|
61ff3fbd7b | ||
|
|
523fc57ab4 | ||
|
|
ae18c5d847 | ||
|
|
4abdc2f35d | ||
|
|
f8748bfa9a | ||
|
|
5fb0ae2c2d | ||
|
|
899fc72014 | ||
|
|
1267c1d9a2 | ||
|
|
9a697e340b | ||
|
|
abe8ca71e0 | ||
|
|
9bbf7dcf96 | ||
|
|
ec1222b58b | ||
|
|
229b46e0ca | ||
|
|
b6a68c4add | ||
|
|
e588bfac7d | ||
|
|
224020533e | ||
|
|
3736bb3aca | ||
|
|
1e72f92b74 | ||
|
|
896f5b2e9f | ||
|
|
c068d4048f | ||
|
|
8796cd76b0 | ||
|
|
1597ede2af | ||
|
|
3dd8020695 | ||
|
|
dfa041991f | ||
|
|
568896742b | ||
|
|
f52973217f | ||
|
|
efd29f1cec | ||
|
|
4b02670049 | ||
|
|
8550874686 | ||
|
|
38513d5a53 | ||
|
|
a35236a8f6 | ||
|
|
0c2e72b7c1 | ||
|
|
f0bdfbebe4 | ||
|
|
a4fa61d05d | ||
|
|
6e23a635c6 | ||
|
|
4dedac6a24 | ||
|
|
8c1b9b33c1 | ||
|
|
d37c17857e | ||
|
|
a0065456d0 | ||
|
|
a34a571d2e | ||
|
|
bb4cfece61 | ||
|
|
b16d263ee7 | ||
|
|
027395bb8a | ||
|
|
3ecd790206 | ||
|
|
52bb9e186b | ||
|
|
68b6d1cab1 | ||
|
|
bdb67b4fba | ||
|
|
d0c39a11d5 | ||
|
|
9de6361938 | ||
|
|
fb016dca86 | ||
|
|
8beb7b4231 | ||
|
|
2b08a79206 | ||
|
|
5885fead8f | ||
|
|
a9fb7a4a88 | ||
|
|
b5dbcaeaf9 | ||
|
|
80a46d4a5c | ||
|
|
febce822d5 | ||
|
|
e8099a713c | ||
|
|
d9de4a09b8 | ||
|
|
2dbcda2619 | ||
|
|
691b93ffb0 | ||
|
|
cb0c94cd40 | ||
|
|
3168718563 | ||
|
|
dc8972a26a | ||
|
|
0a2d8f4d22 | ||
|
|
8d623967ed | ||
|
|
503ed96275 | ||
|
|
d8ba84d427 | ||
|
|
8e8c41a3bc | ||
|
|
e34fe17b45 | ||
|
|
c5b0278c58 | ||
|
|
8daa257b35 | ||
|
|
6329174cfc | ||
|
|
1ec41c1bf1 | ||
|
|
581a76de38 | ||
|
|
5d52ca8909 | ||
|
|
ad7151d394 | ||
|
|
3269a7b0e7 | ||
|
|
6a155cc606 | ||
|
|
a5bbf613e8 | ||
|
|
22427c1359 | ||
|
|
f17121fd6c | ||
|
|
256e37eb3f | ||
|
|
bdfd123b9d | ||
|
|
3f7dce202a | ||
|
|
a6d21abe14 | ||
|
|
d0f1fe2273 | ||
|
|
8de9593209 | ||
|
|
64b2b50470 | ||
|
|
4dc1451c49 | ||
|
|
211081ff25 | ||
|
|
c1c1d5cf8e | ||
|
|
e91ffef258 | ||
|
|
47c8aa3790 | ||
|
|
33b4e7fb0a | ||
|
|
936da0295b | ||
|
|
c2205c14fb | ||
|
|
56935f5743 | ||
|
|
1b3bae790c | ||
|
|
47559a8c87 | ||
|
|
86412ea821 | ||
|
|
b8aa844171 | ||
|
|
f9464c5cf9 | ||
|
|
9df75e1fa3 | ||
|
|
0218e2ebf7 | ||
|
|
a9dc6550d5 | ||
|
|
ffd6ec3c54 | ||
|
|
de3e0df96c | ||
|
|
e5dadf34d9 | ||
|
|
52145f2d73 | ||
|
|
90df3caf62 | ||
|
|
50db66a925 | ||
|
|
8587fa05bd | ||
|
|
8129dade3c | ||
|
|
3610fe7c33 | ||
|
|
90518e0ce5 | ||
|
|
9c060f06ba | ||
|
|
e848aa7813 | ||
|
|
feedc912e4 | ||
|
|
ab3f05cf62 | ||
|
|
35982e51bf | ||
|
|
94e650c518 | ||
|
|
d9edc18bf8 | ||
|
|
f4d01e0a05 | ||
|
|
648cfaba51 | ||
|
|
3a9de13f4e | ||
|
|
1161b21166 |
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -34,51 +34,91 @@ stages:
|
||||
- scan
|
||||
- release
|
||||
|
||||
.main-or-manual:
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
|
||||
- if: $CI_PIPELINE_SOURCE == "schedule"
|
||||
when: manual
|
||||
|
||||
# Define the distribution targets
|
||||
.dist-amazonlinux2:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: amazonlinux2
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-centos7:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: centos7
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-centos8:
|
||||
variables:
|
||||
DIST: centos8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-debian10:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: debian10
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-debian9:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: debian9
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-fedora35:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: fedora35
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-opensuse-leap15.1:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: opensuse-leap15.1
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-ubi8:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubi8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-ubuntu16.04:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubuntu16.04
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-ubuntu18.04:
|
||||
variables:
|
||||
DIST: ubuntu18.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-ubuntu20.04:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubuntu20.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-packaging:
|
||||
variables:
|
||||
@@ -98,6 +138,8 @@ stages:
|
||||
ARCH: arm64
|
||||
|
||||
.arch-ppc64le:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
ARCH: ppc64le
|
||||
|
||||
@@ -138,7 +180,7 @@ test-packaging:
|
||||
# Download the regctl binary for use in the release steps
|
||||
.regctl-setup:
|
||||
before_script:
|
||||
- export REGCTL_VERSION=v0.3.10
|
||||
- export REGCTL_VERSION=v0.4.5
|
||||
- apk add --no-cache curl
|
||||
- mkdir -p bin
|
||||
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
|
||||
@@ -232,10 +274,7 @@ release:staging-ubuntu20.04:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- test-toolkit-ubuntu20.04
|
||||
- test-containerd-ubuntu20.04
|
||||
- test-crio-ubuntu20.04
|
||||
- test-docker-ubuntu20.04
|
||||
- image-ubuntu20.04
|
||||
|
||||
release:staging-packaging:
|
||||
extends:
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,6 +4,8 @@ dist
|
||||
/coverage.out*
|
||||
/test/output/
|
||||
/nvidia-container-runtime
|
||||
/nvidia-container-runtime-hook
|
||||
/nvidia-container-toolkit
|
||||
/nvidia-ctk
|
||||
/shared-*
|
||||
/release-*
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -94,7 +94,7 @@ unit-tests:
|
||||
- .multi-arch-build
|
||||
- .package-artifacts
|
||||
stage: package-build
|
||||
timeout: 2h 30m
|
||||
timeout: 3h
|
||||
script:
|
||||
- ./scripts/build-packages.sh ${DIST}-${ARCH}
|
||||
|
||||
@@ -158,6 +158,18 @@ package-debian9-amd64:
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
package-fedora35-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-aarch64
|
||||
|
||||
package-fedora35-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-x86_64
|
||||
|
||||
package-opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
@@ -250,7 +262,8 @@ image-ubuntu18.04:
|
||||
needs:
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
- job: package-ubuntu18.04-ppc64le
|
||||
optional: true
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
@@ -269,21 +282,36 @@ image-packaging:
|
||||
- .package-artifacts
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- package-amazonlinux2-aarch64
|
||||
- package-amazonlinux2-x86_64
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-ppc64le
|
||||
- package-centos8-x86_64
|
||||
- package-debian10-amd64
|
||||
- package-debian9-amd64
|
||||
- package-opensuse-leap15.1-x86_64
|
||||
- package-ubuntu16.04-amd64
|
||||
- package-ubuntu16.04-ppc64le
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
- job: package-centos8-aarch64
|
||||
- job: package-centos8-x86_64
|
||||
- job: package-ubuntu18.04-amd64
|
||||
- job: package-ubuntu18.04-arm64
|
||||
- job: package-amazonlinux2-aarch64
|
||||
optional: true
|
||||
- job: package-amazonlinux2-x86_64
|
||||
optional: true
|
||||
- job: package-centos7-ppc64le
|
||||
optional: true
|
||||
- job: package-centos7-x86_64
|
||||
optional: true
|
||||
- job: package-centos8-ppc64le
|
||||
optional: true
|
||||
- job: package-debian10-amd64
|
||||
optional: true
|
||||
- job: package-debian9-amd64
|
||||
optional: true
|
||||
- job: package-fedora35-aarch64
|
||||
optional: true
|
||||
- job: package-fedora35-x86_64
|
||||
optional: true
|
||||
- job: package-opensuse-leap15.1-x86_64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-amd64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-ppc64le
|
||||
optional: true
|
||||
- job: package-ubuntu18.04-ppc64le
|
||||
optional: true
|
||||
|
||||
# Define publish test helpers
|
||||
.test:toolkit:
|
||||
@@ -343,31 +371,3 @@ test-docker-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-toolkit-ubuntu20.04:
|
||||
extends:
|
||||
- .test:toolkit
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-containerd-ubuntu20.04:
|
||||
extends:
|
||||
- .test:containerd
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-crio-ubuntu20.04:
|
||||
extends:
|
||||
- .test:crio
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-docker-ubuntu20.04:
|
||||
extends:
|
||||
- .test:docker
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
|
||||
179
.nvidia-ci.yml
179
.nvidia-ci.yml
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -35,6 +35,9 @@ variables:
|
||||
# Define the public staging registry
|
||||
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
|
||||
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
|
||||
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
|
||||
# TODO: We should set the kitmaker release folder here once we have the end-to-end workflow set up
|
||||
KITMAKER_RELEASE_FOLDER: "testing"
|
||||
|
||||
.image-pull:
|
||||
stage: image-build
|
||||
@@ -48,8 +51,9 @@ variables:
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PUSH_MULTIPLE_TAGS: "false"
|
||||
# We delay the job start to allow the public pipeline to generate the required images.
|
||||
when: delayed
|
||||
start_in: 30 minutes
|
||||
rules:
|
||||
- when: delayed
|
||||
start_in: 30 minutes
|
||||
timeout: 30 minutes
|
||||
retry:
|
||||
max: 2
|
||||
@@ -67,29 +71,132 @@ variables:
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-centos7
|
||||
- .image-pull
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubi8
|
||||
- .image-pull
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu18.04
|
||||
- .image-pull
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu20.04
|
||||
- .image-pull
|
||||
|
||||
# The DIST=packaging target creates an image containing all built packages
|
||||
image-packaging:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-packaging
|
||||
- .image-pull
|
||||
|
||||
# Define the package release targets
|
||||
release:packages:amazonlinux2-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-amazonlinux2
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:amazonlinux2-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-amazonlinux2
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:centos7-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos7
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:centos7-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos7
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:centos8-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:centos8-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:centos8-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:debian10-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-debian10
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:debian9-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:fedora35-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-fedora35
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:fedora35-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-fedora35
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-opensuse-leap15.1
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:ubuntu16.04-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu16.04
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:ubuntu16.04-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu16.04
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:ubuntu18.04-arm64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-arm64
|
||||
|
||||
release:packages:ubuntu18.04-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-ppc64le
|
||||
|
||||
# We skip the integration tests for the internal CI:
|
||||
.integration:
|
||||
@@ -107,9 +214,9 @@ image-packaging:
|
||||
variables:
|
||||
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
|
||||
IMAGE_ARCHIVE: "container-toolkit.tar"
|
||||
except:
|
||||
variables:
|
||||
- $SKIP_SCANS && $SKIP_SCANS == "yes"
|
||||
rules:
|
||||
- if: $SKIP_SCANS != "yes"
|
||||
- when: manual
|
||||
before_script:
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
# TODO: We should specify the architecture here and scan all architectures
|
||||
@@ -134,59 +241,59 @@ image-packaging:
|
||||
# Define the scan targets
|
||||
scan-centos7-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
scan-centos7-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-centos7
|
||||
- scan-centos7-amd64
|
||||
|
||||
scan-ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu18.04
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
scan-ubuntu20.04-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
scan-ubuntu20.04-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- scan-ubuntu20.04-amd64
|
||||
|
||||
scan-ubi8-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubi8
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubi8
|
||||
|
||||
scan-ubi8-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubi8
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubi8
|
||||
- scan-ubi8-amd64
|
||||
@@ -201,6 +308,27 @@ scan-ubi8-arm64:
|
||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||
|
||||
.release:packages:
|
||||
stage: release
|
||||
needs:
|
||||
- image-packaging
|
||||
variables:
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
PACKAGE_REGISTRY: "${CI_REGISTRY}"
|
||||
PACKAGE_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
||||
PACKAGE_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
|
||||
PACKAGE_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-${PACKAGE_REPO_TYPE}-local"
|
||||
KITMAKER_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${KITMAKER_RELEASE_FOLDER}"
|
||||
script:
|
||||
- !reference [.regctl-setup, before_script]
|
||||
- apk add --no-cache bash
|
||||
- regctl registry login "${PACKAGE_REGISTRY}" -u "${PACKAGE_REGISTRY_USER}" -p "${PACKAGE_REGISTRY_TOKEN}"
|
||||
- ./scripts/extract-packages.sh "${PACKAGE_IMAGE_NAME}:${PACKAGE_IMAGE_TAG}" "${DIST}-${ARCH}"
|
||||
# TODO: ./scripts/release-packages-artifactory.sh "${DIST}-${ARCH}" "${PACKAGE_ARTIFACTORY_REPO}"
|
||||
- ./scripts/release-kitmaker-artifactory.sh "${DIST}-${ARCH}" "${KITMAKER_ARTIFACTORY_REPO}"
|
||||
|
||||
release:staging-ubuntu18.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
@@ -208,31 +336,24 @@ release:staging-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
release:staging-ubuntu20.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
# Define the external release targets
|
||||
# Release to NGC
|
||||
release:ngc-centos7:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-centos7
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubuntu18.04:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu18.04
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubuntu20.04:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu20.04
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubi8:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubi8
|
||||
- .release:ngc
|
||||
|
||||
41
CHANGELOG.md
41
CHANGELOG.md
@@ -1,5 +1,46 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.12.0-rc.2
|
||||
|
||||
* Inject Direct Rendering Manager (DRM) devices into a container using the NVIDIA Container Runtime
|
||||
* Improve logging of errors from the NVIDIA Container Runtime
|
||||
* Improve CDI specification generation to support rootless podman
|
||||
* Use `nvidia-ctk cdi generate` to generate CDI specifications instead of `nvidia-ctk info generate-cdi`
|
||||
* [libnvidia-container] Skip creation of existing files when these are already mounted
|
||||
|
||||
## v1.12.0-rc.1
|
||||
|
||||
* Add support for multiple Docker Swarm resources
|
||||
* Improve injection of Vulkan configurations and libraries
|
||||
* Add `nvidia-ctk info generate-cdi` command to generated CDI specification for available devices
|
||||
* [libnvidia-container] Include NVVM compiler library in compute libs
|
||||
|
||||
## v1.11.0
|
||||
|
||||
* Promote v1.11.0-rc.3 to v1.11.0
|
||||
|
||||
## v1.11.0-rc.3
|
||||
|
||||
* Build fedora35 packages
|
||||
* Introduce an `nvidia-container-toolkit-base` package for better dependency management
|
||||
* Fix removal of `nvidia-container-runtime-hook` on RPM-based systems
|
||||
* Inject platform files into container on Tegra-based systems
|
||||
* [toolkit container] Update CUDA base images to 11.7.1
|
||||
* [libnvidia-container] Preload libgcc_s.so.1 on arm64 systems
|
||||
|
||||
## v1.11.0-rc.2
|
||||
|
||||
* Allow `accept-nvidia-visible-devices-*` config options to be set by toolkit container
|
||||
* [libnvidia-container] Fix bug where LDCache was not updated when the `--no-pivot-root` option was specified
|
||||
|
||||
## v1.11.0-rc.1
|
||||
|
||||
* Add discovery of GPUDirect Storage (`nvidia-fs*`) devices if the `NVIDIA_GDS` environment variable of the container is set to `enabled`
|
||||
* Add discovery of MOFED Infiniband devices if the `NVIDIA_MOFED` environment variable of the container is set to `enabled`
|
||||
* Fix bug in CSV mode where libraries listed as `sym` entries in mount specification are not added to the LDCache.
|
||||
* Rename `nvidia-container-toolkit` executable to `nvidia-container-runtime-hook` and create `nvidia-container-toolkit` as a symlink to `nvidia-container-runtime-hook` instead.
|
||||
* Add `nvidia-ctk runtime configure` command to configure the Docker config file (e.g. `/etc/docker/daemon.json`) for use with the NVIDIA Container Runtime.
|
||||
|
||||
## v1.10.0
|
||||
|
||||
* Promote v1.10.0-rc.3 to v1.10.0
|
||||
|
||||
3
Makefile
3
Makefile
@@ -51,6 +51,7 @@ CLI_VERSION = $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
else
|
||||
CLI_VERSION = $(VERSION)
|
||||
endif
|
||||
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
|
||||
|
||||
GOOS ?= linux
|
||||
|
||||
@@ -60,7 +61,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
|
||||
endif
|
||||
cmds: $(CMD_TARGETS)
|
||||
$(CMD_TARGETS): cmd-%:
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w -X github.com/NVIDIA/nvidia-container-toolkit/internal/info.gitCommit=$(GIT_COMMIT) -X github.com/NVIDIA/nvidia-container-toolkit/internal/info.version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
|
||||
build:
|
||||
GOOS=$(GOOS) go build ./...
|
||||
|
||||
@@ -69,7 +69,7 @@ RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm
|
||||
yum localinstall -y \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-1.*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*-${PACKAGE_VERSION}*.rpm
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
|
||||
@@ -25,5 +25,7 @@ ARG ARTIFACTS_ROOT
|
||||
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
|
||||
WORKDIR /artifacts/packages
|
||||
# Create a manifest.txt file with the absolute paths of all deb and rpm packages in the container
|
||||
RUN find /artifacts/packages -iname '*.deb' -o -iname '*.rpm' > /artifacts/manifest.txt
|
||||
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
@@ -77,7 +77,7 @@ RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
|
||||
RUN dpkg -i \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit_${PACKAGE_VERSION}*.deb
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*_${PACKAGE_VERSION}*.deb
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@ import (
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
var envSwarmGPU *string
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
@@ -165,15 +163,24 @@ func isPrivileged(s *Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(image image.CUDA) *string {
|
||||
// Build a list of envvars to consider.
|
||||
envVars := []string{envNVVisibleDevices}
|
||||
if envSwarmGPU != nil {
|
||||
// The Swarm envvar has higher precedence.
|
||||
envVars = append([]string{*envSwarmGPU}, envVars...)
|
||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||
// if specified.
|
||||
var hasSwarmEnvvar bool
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if _, exists := image[envvar]; exists {
|
||||
hasSwarmEnvvar = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var devices []string
|
||||
if hasSwarmEnvvar {
|
||||
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||
} else {
|
||||
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
|
||||
}
|
||||
|
||||
devices := image.DevicesFromEnvvars(envVars...)
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
@@ -230,7 +237,7 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
|
||||
}
|
||||
|
||||
// Fallback to reading from the environment variable if privileges are correct
|
||||
devices := getDevicesFromEnvvar(image)
|
||||
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
||||
if devices == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -348,7 +355,6 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
}
|
||||
|
||||
privileged := isPrivileged(s)
|
||||
envSwarmGPU = hook.SwarmResource
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
@@ -69,7 +70,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description: "Legacy image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "",
|
||||
envNVVisibleDevices: "void",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
@@ -226,7 +227,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description: "Modern image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "",
|
||||
envNVVisibleDevices: "void",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
@@ -449,6 +450,44 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, swarmResource overrides device selection",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
s := "DOCKER_SWARM_RESOURCE"
|
||||
return &s
|
||||
}(),
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "GPU1,GPU2",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, comma separated swarmResource is split and overrides device selection",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
s := "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE"
|
||||
return &s
|
||||
}(),
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "GPU1,GPU2",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
@@ -689,12 +728,13 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||
gpuID := "GPU-12345"
|
||||
anotherGPUID := "GPU-67890"
|
||||
thirdGPUID := "MIG-12345"
|
||||
|
||||
var tests = []struct {
|
||||
description string
|
||||
envSwarmGPU *string
|
||||
env map[string]string
|
||||
expectedDevices *string
|
||||
description string
|
||||
swarmResourceEnvvars []string
|
||||
env map[string]string
|
||||
expectedDevices *string
|
||||
}{
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
@@ -798,42 +838,42 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||
// enabled
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
},
|
||||
{
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "none",
|
||||
},
|
||||
expectedDevices: &empty,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
},
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
envCUDAVersion: "legacy",
|
||||
@@ -841,28 +881,58 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: func() *string {
|
||||
result := fmt.Sprintf("%s,%s", thirdGPUID, anotherGPUID)
|
||||
return &result
|
||||
}(),
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
envSwarmGPU = tc.envSwarmGPU
|
||||
devices := getDevicesFromEnvvar(image.CUDA(tc.env))
|
||||
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
|
||||
if tc.expectedDevices == nil {
|
||||
require.Nil(t, devices, "%d: %v", i, tc)
|
||||
return
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
@@ -116,3 +117,22 @@ func (c HookConfig) getConfigOption(fieldName string) string {
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||
func (c *HookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c.SwarmResource == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
candidates := strings.Split(*c.SwarmResource, ",")
|
||||
|
||||
var envvars []string
|
||||
for _, c := range candidates {
|
||||
trimmed := strings.TrimSpace(c)
|
||||
if len(trimmed) > 0 {
|
||||
envvars = append(envvars, trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
return envvars
|
||||
}
|
||||
|
||||
@@ -103,3 +103,59 @@ func TestGetHookConfig(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetSwarmResourceEnvvars(t *testing.T) {
|
||||
testCases := []struct {
|
||||
value string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
value: "nil",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: "",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: " ",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: "single",
|
||||
expected: []string{"single"},
|
||||
},
|
||||
{
|
||||
value: "single ",
|
||||
expected: []string{"single"},
|
||||
},
|
||||
{
|
||||
value: "one,two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
{
|
||||
value: "one ,two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
{
|
||||
value: "one, two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
c := &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
if tc.value == "nil" {
|
||||
return nil
|
||||
}
|
||||
return &tc.value
|
||||
}(),
|
||||
}
|
||||
|
||||
envvars := c.getSwarmResourceEnvvars()
|
||||
require.EqualValues(t, tc.expected, envvars)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ func doPrestart() {
|
||||
cli := hook.NvidiaContainerCLI
|
||||
|
||||
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.")
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
|
||||
}
|
||||
|
||||
container := getContainerConfig(hook)
|
||||
|
||||
@@ -48,7 +48,12 @@ func run(argv []string) (rerr error) {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to set up logger: %v", err)
|
||||
}
|
||||
defer logger.Reset()
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
logger.Errorf("%v", rerr)
|
||||
}
|
||||
logger.Reset()
|
||||
}()
|
||||
|
||||
logger.Debugf("Command line arguments: %v", argv)
|
||||
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
|
||||
|
||||
@@ -67,6 +67,11 @@ func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec
|
||||
return nil, err
|
||||
}
|
||||
|
||||
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gdsModifier, err := modifier.NewGDSModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -77,10 +82,17 @@ func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tegraModifier, err := modifier.NewTegraPlatformFiles(logger)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
modifiers := modifier.Merge(
|
||||
modeModifier,
|
||||
graphicsModifier,
|
||||
gdsModifier,
|
||||
mofedModifier,
|
||||
tegraModifier,
|
||||
)
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
@@ -15,3 +15,34 @@ nvidia-ctk runtime configure --set-as-default
|
||||
```
|
||||
will ensure that the NVIDIA Container Runtime is added as the default runtime to the default container
|
||||
engine.
|
||||
|
||||
### Generate CDI specifications
|
||||
|
||||
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
|
||||
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
|
||||
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
|
||||
available NVIDIA GPUs in a system.
|
||||
|
||||
In order to generate the CDI specification for the available devices, run the following command:\
|
||||
```bash
|
||||
nvidia-ctk cdi generate
|
||||
```
|
||||
|
||||
The default is to print the specification to STDOUT and a filename can be specified using the `--output` flag.
|
||||
|
||||
The specification will contain a device entries as follows (where applicable):
|
||||
* An `nvidia.com/gpu=gpu{INDEX}` device for each non-MIG-enabled full GPU in the system
|
||||
* An `nvidia.com/gpu=mig{GPU_INDEX}:{MIG_INDEX}` device for each MIG-device in the system
|
||||
* A special device called `nvidia.com/gpu=all` which represents all available devices.
|
||||
|
||||
For example, to generate the CDI specification in the default location where CDI-enabled tools such as `podman`, `containerd`, `cri-o`, or the NVIDIA Container Runtime can be configured to load it, the following command can be run:
|
||||
|
||||
```bash
|
||||
sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||
```
|
||||
(Note that `sudo` is used to ensure the correct permissions to write to the `/etc/cdi` folder)
|
||||
|
||||
With the specification generated, a GPU can be requested by specifying the fully-qualified CDI device name. With `podman` as an exmaple:
|
||||
```bash
|
||||
podman run --rm -ti --device=nvidia.com/gpu=gpu0 ubuntu nvidia-smi -L
|
||||
```
|
||||
|
||||
50
cmd/nvidia-ctk/cdi/cdi.go
Normal file
50
cmd/nvidia-ctk/cdi/cdi.go
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cdi
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an info command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "cdi",
|
||||
Usage: "Provide tools for interacting with Container Device Interface specifications",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{
|
||||
generate.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
}
|
||||
559
cmd/nvidia-ctk/cdi/generate/generate.go
Normal file
559
cmd/nvidia-ctk/cdi/generate/generate.go
Normal file
@@ -0,0 +1,559 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaCTKExecutable = "nvidia-ctk"
|
||||
nvidiaCTKDefaultFilePath = "/usr/bin/" + nvidiaCTKExecutable
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
output string
|
||||
jsonMode bool
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build creates the CLI command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'generate-cdi' command
|
||||
c := cli.Command{
|
||||
Name: "generate",
|
||||
Usage: "Generate CDI specifications for use with CDI-enabled runtimes",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "output",
|
||||
Usage: "Specify the file to output the generated CDI specification to. If this is '-' or '' the specification is output to STDOUT",
|
||||
Destination: &cfg.output,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "json",
|
||||
Usage: "Output the generated CDI spec in JSON mode instead of YAML",
|
||||
Destination: &cfg.jsonMode,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
spec, err := m.generateSpec()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec: %v", err)
|
||||
}
|
||||
|
||||
var outputTo io.Writer
|
||||
if cfg.output == "" || cfg.output == "-" {
|
||||
outputTo = os.Stdout
|
||||
} else {
|
||||
outputFile, err := os.Create(cfg.output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create output file: %v", err)
|
||||
}
|
||||
defer outputFile.Close()
|
||||
outputTo = outputFile
|
||||
}
|
||||
|
||||
if filepath.Ext(cfg.output) == ".json" {
|
||||
cfg.jsonMode = true
|
||||
} else if filepath.Ext(cfg.output) == ".yaml" || filepath.Ext(cfg.output) == ".yml" {
|
||||
cfg.jsonMode = false
|
||||
}
|
||||
|
||||
data, err := yaml.Marshal(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal CDI spec: %v", err)
|
||||
}
|
||||
|
||||
if cfg.jsonMode {
|
||||
data, err = yaml.YAMLToJSONStrict(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to convert CDI spec from YAML to JSON: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = writeToOutput(cfg.jsonMode, data, outputTo)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write output: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeToOutput(jsonMode bool, data []byte, output io.Writer) error {
|
||||
if !jsonMode {
|
||||
_, err := output.Write([]byte("---\n"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write YAML separator: %v", err)
|
||||
}
|
||||
|
||||
}
|
||||
_, err := output.Write(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write data: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) generateSpec() (*specs.Spec, error) {
|
||||
nvmllib := nvml.New()
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, r
|
||||
}
|
||||
defer nvmllib.Shutdown()
|
||||
|
||||
devicelib := device.New(device.WithNvml(nvmllib))
|
||||
|
||||
spec := specs.Spec{
|
||||
Version: "0.4.0",
|
||||
Kind: "nvidia.com/gpu",
|
||||
ContainerEdits: specs.ContainerEdits{},
|
||||
}
|
||||
err := devicelib.VisitDevices(func(i int, d device.Device) error {
|
||||
isMig, err := d.IsMigEnabled()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
|
||||
}
|
||||
if isMig {
|
||||
return nil
|
||||
}
|
||||
device, err := generateEditsForDevice(newGPUDevice(i, d))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec for device %v: %v", i, err)
|
||||
}
|
||||
|
||||
graphicsEdits, err := m.editsForGraphicsDevice(d)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec for DRM devices associated with device %v: %v", i, err)
|
||||
}
|
||||
|
||||
// We add the device nodes and hooks edits for the DRM devices; Mounts are added globally
|
||||
for _, dn := range graphicsEdits.DeviceNodes {
|
||||
device.ContainerEdits.DeviceNodes = append(device.ContainerEdits.DeviceNodes, dn)
|
||||
}
|
||||
for _, h := range graphicsEdits.Hooks {
|
||||
device.ContainerEdits.Hooks = append(device.ContainerEdits.Hooks, h)
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, device)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
|
||||
}
|
||||
|
||||
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, m device.MigDevice) error {
|
||||
device, err := generateEditsForDevice(newMigDevice(i, j, m))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec for device %v: %v", i, err)
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, device)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
|
||||
}
|
||||
|
||||
// We create an "all" device with all the discovered device nodes
|
||||
var allDeviceNodes []*specs.DeviceNode
|
||||
for _, d := range spec.Devices {
|
||||
for _, dn := range d.ContainerEdits.DeviceNodes {
|
||||
allDeviceNodes = append(allDeviceNodes, dn)
|
||||
}
|
||||
}
|
||||
all := specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: allDeviceNodes,
|
||||
},
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, all)
|
||||
spec.ContainerEdits.DeviceNodes = m.getExistingMetaDeviceNodes()
|
||||
|
||||
libraries, err := m.findLibs(nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver libraries: %v", err)
|
||||
}
|
||||
|
||||
binaries, err := m.findBinaries()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver binaries: %v", err)
|
||||
}
|
||||
|
||||
ipcs, err := m.findIPC()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver IPC sockets: %v", err)
|
||||
}
|
||||
|
||||
graphicsEdits, err := m.editsForGraphicsDevice(nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generated edits for graphics libraries")
|
||||
}
|
||||
|
||||
libOptions := []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
}
|
||||
ipcOptions := append(libOptions, "noexec")
|
||||
|
||||
spec.ContainerEdits.Mounts = append(
|
||||
generateMountsForPaths(libOptions, libraries, binaries),
|
||||
generateMountsForPaths(ipcOptions, ipcs)...,
|
||||
)
|
||||
|
||||
spec.ContainerEdits.Mounts = append(spec.ContainerEdits.Mounts, graphicsEdits.Mounts...)
|
||||
|
||||
ldcacheUpdateHook := m.generateUpdateLdCacheHook(libraries)
|
||||
|
||||
deviceFolderPermissionHooks, err := m.generateDeviceFolderPermissionHooks(ldcacheUpdateHook.Path, allDeviceNodes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
|
||||
}
|
||||
|
||||
spec.ContainerEdits.Hooks = append([]*specs.Hook{ldcacheUpdateHook}, deviceFolderPermissionHooks...)
|
||||
|
||||
return &spec, nil
|
||||
}
|
||||
|
||||
func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
|
||||
deviceNodePaths, err := d.GetDeviceNodes()
|
||||
if err != nil {
|
||||
return specs.Device{}, fmt.Errorf("failed to get paths for device: %v", err)
|
||||
}
|
||||
|
||||
deviceNodes := getDeviceNodesFromPaths(deviceNodePaths)
|
||||
|
||||
device := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: deviceNodes,
|
||||
},
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (m command) editsForGraphicsDevice(device device.Device) (*specs.ContainerEdits, error) {
|
||||
selectedDevice := image.NewVisibleDevices("none")
|
||||
if device != nil {
|
||||
uuid, ret := device.GetUUID()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device UUID: %v", ret)
|
||||
}
|
||||
selectedDevice = image.NewVisibleDevices(uuid)
|
||||
}
|
||||
cfg := discover.Config{
|
||||
Root: "",
|
||||
NVIDIAContainerToolkitCLIExecutablePath: "nvidia-ctk",
|
||||
}
|
||||
// Create a discoverer for the single device:
|
||||
d, err := discover.NewGraphicsDiscoverer(m.logger, selectedDevice, &cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing discoverer: %v", err)
|
||||
}
|
||||
|
||||
devices, err := d.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting DRM devices: %v", err)
|
||||
}
|
||||
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
for _, d := range devices {
|
||||
dn := specs.DeviceNode{
|
||||
Path: d.Path,
|
||||
HostPath: d.HostPath,
|
||||
}
|
||||
deviceNodes = append(deviceNodes, &dn)
|
||||
}
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting hooks: %v", err)
|
||||
}
|
||||
|
||||
var cdiHooks []*specs.Hook
|
||||
for _, h := range hooks {
|
||||
cdiHook := specs.Hook{
|
||||
HookName: h.Lifecycle,
|
||||
Path: h.Path,
|
||||
Args: h.Args,
|
||||
}
|
||||
cdiHooks = append(cdiHooks, &cdiHook)
|
||||
}
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting mounts: %v", err)
|
||||
}
|
||||
|
||||
var cdiMounts []*specs.Mount
|
||||
for _, m := range mounts {
|
||||
cdiMount := specs.Mount{
|
||||
ContainerPath: m.Path,
|
||||
HostPath: m.HostPath,
|
||||
Options: []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
},
|
||||
Type: "bind",
|
||||
}
|
||||
cdiMounts = append(cdiMounts, &cdiMount)
|
||||
}
|
||||
|
||||
edits := specs.ContainerEdits{
|
||||
DeviceNodes: deviceNodes,
|
||||
Hooks: cdiHooks,
|
||||
Mounts: cdiMounts,
|
||||
}
|
||||
|
||||
return &edits, nil
|
||||
}
|
||||
|
||||
func (m command) getExistingMetaDeviceNodes() []*specs.DeviceNode {
|
||||
metaDeviceNodePaths := []string{
|
||||
"/dev/nvidia-modeset",
|
||||
"/dev/nvidia-uvm-tools",
|
||||
"/dev/nvidia-uvm",
|
||||
"/dev/nvidiactl",
|
||||
}
|
||||
|
||||
var existingDeviceNodePaths []string
|
||||
for _, p := range metaDeviceNodePaths {
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
m.logger.Infof("Ignoring missing meta device %v", p)
|
||||
continue
|
||||
}
|
||||
existingDeviceNodePaths = append(existingDeviceNodePaths, p)
|
||||
}
|
||||
|
||||
return getDeviceNodesFromPaths(existingDeviceNodePaths)
|
||||
}
|
||||
|
||||
func getDeviceNodesFromPaths(deviceNodePaths []string) []*specs.DeviceNode {
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
for _, p := range deviceNodePaths {
|
||||
deviceNode := specs.DeviceNode{
|
||||
Path: p,
|
||||
}
|
||||
deviceNodes = append(deviceNodes, &deviceNode)
|
||||
}
|
||||
|
||||
return deviceNodes
|
||||
}
|
||||
|
||||
func (m command) findLibs(nvmllib nvml.Interface) ([]string, error) {
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||
}
|
||||
m.logger.Infof("Using driver version %v", version)
|
||||
|
||||
cache, err := ldcache.New(m.logger, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load ldcache: %v", err)
|
||||
}
|
||||
|
||||
libs32, libs64 := cache.List()
|
||||
|
||||
var libs []string
|
||||
for _, l := range libs64 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
m.logger.Infof("found 64-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
for _, l := range libs32 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
m.logger.Infof("found 32-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
return libs, nil
|
||||
}
|
||||
|
||||
func (m command) findBinaries() ([]string, error) {
|
||||
candidates := []string{
|
||||
"nvidia-smi", /* System management interface */
|
||||
"nvidia-debugdump", /* GPU coredump utility */
|
||||
"nvidia-persistenced", /* Persistence mode utility */
|
||||
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
||||
"nvidia-cuda-mps-server", /* Multi process service server */
|
||||
}
|
||||
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
|
||||
var binaries []string
|
||||
for _, c := range candidates {
|
||||
targets, err := locator.Locate(c)
|
||||
if err != nil {
|
||||
m.logger.Warningf("skipping %v: %v", c, err)
|
||||
continue
|
||||
}
|
||||
|
||||
binaries = append(binaries, targets[0])
|
||||
}
|
||||
return binaries, nil
|
||||
}
|
||||
|
||||
func (m command) findIPC() ([]string, error) {
|
||||
candidates := []string{
|
||||
"/var/run/nvidia-persistenced/socket",
|
||||
"/var/run/nvidia-fabricmanager/socket",
|
||||
// TODO: This can be controlled by the NV_MPS_PIPE_DIR envvar
|
||||
"/tmp/nvidia-mps",
|
||||
}
|
||||
|
||||
locator := lookup.NewFileLocator(m.logger, "")
|
||||
|
||||
var ipcs []string
|
||||
for _, c := range candidates {
|
||||
targets, err := locator.Locate(c)
|
||||
if err != nil {
|
||||
m.logger.Warningf("skipping %v: %v", c, err)
|
||||
continue
|
||||
}
|
||||
|
||||
ipcs = append(ipcs, targets[0])
|
||||
}
|
||||
return ipcs, nil
|
||||
}
|
||||
|
||||
func generateMountsForPaths(options []string, pathSets ...[]string) []*specs.Mount {
|
||||
var mounts []*specs.Mount
|
||||
for _, paths := range pathSets {
|
||||
for _, p := range paths {
|
||||
mount := specs.Mount{
|
||||
HostPath: p,
|
||||
// We may want to adjust the container path
|
||||
ContainerPath: p,
|
||||
Type: "bind",
|
||||
Options: options,
|
||||
}
|
||||
mounts = append(mounts, &mount)
|
||||
}
|
||||
}
|
||||
return mounts
|
||||
}
|
||||
|
||||
func (m command) generateUpdateLdCacheHook(libraries []string) *specs.Hook {
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
|
||||
hook := discover.CreateLDCacheUpdateHook(
|
||||
m.logger,
|
||||
locator,
|
||||
nvidiaCTKExecutable,
|
||||
nvidiaCTKDefaultFilePath,
|
||||
libraries,
|
||||
)
|
||||
return &specs.Hook{
|
||||
HookName: hook.Lifecycle,
|
||||
Path: hook.Path,
|
||||
Args: hook.Args,
|
||||
}
|
||||
}
|
||||
|
||||
func (m command) generateDeviceFolderPermissionHooks(nvidiaCTKPath string, deviceNodes []*specs.DeviceNode) ([]*specs.Hook, error) {
|
||||
var deviceFolders []string
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, dn := range deviceNodes {
|
||||
if !strings.HasPrefix(dn.Path, "/dev") {
|
||||
m.logger.Warningf("Skipping unexpected device folder path for device %v", dn.Path)
|
||||
continue
|
||||
}
|
||||
for df := filepath.Dir(dn.Path); df != "/dev"; df = filepath.Dir(df) {
|
||||
if seen[df] {
|
||||
continue
|
||||
}
|
||||
deviceFolders = append(deviceFolders, df)
|
||||
seen[df] = true
|
||||
}
|
||||
}
|
||||
|
||||
foldersByMode := make(map[string][]string)
|
||||
for _, p := range deviceFolders {
|
||||
info, err := os.Stat(p)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get info for path %v: %v", p, err)
|
||||
}
|
||||
mode := fmt.Sprintf("%o", info.Mode().Perm())
|
||||
foldersByMode[mode] = append(foldersByMode[mode], p)
|
||||
}
|
||||
|
||||
var hooks []*specs.Hook
|
||||
for mode, folders := range foldersByMode {
|
||||
args := []string{filepath.Base(nvidiaCTKPath), "hook", "chmod", "--mode", mode}
|
||||
for _, folder := range folders {
|
||||
args = append(args, "--path", folder)
|
||||
}
|
||||
hook := specs.Hook{
|
||||
HookName: cdi.CreateContainerHook,
|
||||
Path: nvidiaCTKPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
hooks = append(hooks, &hook)
|
||||
}
|
||||
|
||||
return hooks, nil
|
||||
}
|
||||
123
cmd/nvidia-ctk/cdi/generate/nvml_devices.go
Normal file
123
cmd/nvidia-ctk/cdi/generate/nvml_devices.go
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// nvmlDevice wraps an nvml.Device with more functions.
|
||||
type nvmlDevice struct {
|
||||
nvml.Device
|
||||
}
|
||||
|
||||
// nvmlMigDevice allows for specific functions of nvmlDevice to be overridden.
|
||||
type nvmlMigDevice nvmlDevice
|
||||
|
||||
// deviceInfo defines the information the required to construct a Device
|
||||
type deviceInfo interface {
|
||||
GetUUID() (string, error)
|
||||
GetDeviceNodes() ([]string, error)
|
||||
}
|
||||
|
||||
var _ deviceInfo = (*nvmlDevice)(nil)
|
||||
var _ deviceInfo = (*nvmlMigDevice)(nil)
|
||||
|
||||
func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) {
|
||||
return fmt.Sprintf("gpu%v", i), nvmlDevice{gpu}
|
||||
}
|
||||
|
||||
func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) {
|
||||
return fmt.Sprintf("mig%v:%v", i, j), nvmlMigDevice{mig}
|
||||
}
|
||||
|
||||
// GetUUID returns the UUID of the device
|
||||
func (d nvmlDevice) GetUUID() (string, error) {
|
||||
uuid, ret := d.Device.GetUUID()
|
||||
if ret != nvml.SUCCESS {
|
||||
return "", ret
|
||||
}
|
||||
return uuid, nil
|
||||
}
|
||||
|
||||
// GetUUID returns the UUID of the device
|
||||
func (d nvmlMigDevice) GetUUID() (string, error) {
|
||||
return nvmlDevice(d).GetUUID()
|
||||
}
|
||||
|
||||
// GetDeviceNodes returns the device node paths for a GPU device
|
||||
func (d nvmlDevice) GetDeviceNodes() ([]string, error) {
|
||||
minor, ret := d.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
path := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
return []string{path}, nil
|
||||
}
|
||||
|
||||
// GetDeviceNodes returns the device node paths for a MIG device
|
||||
func (d nvmlMigDevice) GetDeviceNodes() ([]string, error) {
|
||||
parent, ret := d.GetDeviceHandleFromMigDeviceHandle()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting parent device: %v", ret)
|
||||
}
|
||||
minor, ret := parent.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
migCaps, err := nvcaps.NewMigCaps()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
|
||||
}
|
||||
|
||||
gi, ret := d.GetGpuInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
ci, ret := d.GetComputeInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
|
||||
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
|
||||
}
|
||||
|
||||
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
|
||||
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
||||
}
|
||||
|
||||
devicePaths := []string{
|
||||
parentPath,
|
||||
giCapDevicePath,
|
||||
ciCapDevicePath,
|
||||
}
|
||||
|
||||
return devicePaths, nil
|
||||
}
|
||||
140
cmd/nvidia-ctk/hook/chmod/chmod.go
Normal file
140
cmd/nvidia-ctk/hook/chmod/chmod.go
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package chmod
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
paths cli.StringSlice
|
||||
mode string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a chmod command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the chmod command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'chmod' command
|
||||
c := cli.Command{
|
||||
Name: "chmod",
|
||||
Usage: "Set the permissions of folders in the container by running chmod. The container root is prefixed to the specified paths.",
|
||||
Before: func(c *cli.Context) error {
|
||||
return validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "path",
|
||||
Usage: "Specifiy a path to apply the specified mode to",
|
||||
Destination: &cfg.paths,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "mode",
|
||||
Usage: "Specify the file mode",
|
||||
Destination: &cfg.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func validateFlags(c *cli.Context, cfg *config) error {
|
||||
if strings.TrimSpace(cfg.mode) == "" {
|
||||
return fmt.Errorf("a non-empty mode must be specified")
|
||||
}
|
||||
|
||||
for _, p := range cfg.paths.Value() {
|
||||
if strings.TrimSpace(p) == "" {
|
||||
return fmt.Errorf("paths must not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
if containerRoot == "" {
|
||||
return fmt.Errorf("empty container root detected")
|
||||
}
|
||||
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value())
|
||||
if len(paths) == 0 {
|
||||
m.logger.Debugf("No paths specified; exiting")
|
||||
return nil
|
||||
}
|
||||
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
targets, err := locator.Locate("chmod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to locate chmod: %v", err)
|
||||
}
|
||||
chmodPath := targets[0]
|
||||
|
||||
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
|
||||
|
||||
return syscall.Exec(chmodPath, args, nil)
|
||||
}
|
||||
|
||||
// getPaths updates the specified paths relative to the root.
|
||||
func (m command) getPaths(root string, paths []string) []string {
|
||||
var pathsInRoot []string
|
||||
for _, f := range paths {
|
||||
pathsInRoot = append(pathsInRoot, filepath.Join(root, f))
|
||||
}
|
||||
|
||||
return pathsInRoot
|
||||
}
|
||||
@@ -74,7 +74,7 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "link",
|
||||
Usage: "Specify a specific link to create. The link is specified as source:target",
|
||||
Usage: "Specify a specific link to create. The link is specified as target::link",
|
||||
Destination: &cfg.links,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
@@ -145,7 +145,7 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
|
||||
links := cfg.links.Value()
|
||||
for _, l := range links {
|
||||
parts := strings.Split(l, ":")
|
||||
parts := strings.Split(l, "::")
|
||||
if len(parts) != 2 {
|
||||
m.logger.Warnf("Invalid link specification %v", l)
|
||||
continue
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package hook
|
||||
|
||||
import (
|
||||
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -46,6 +47,7 @@ func (m hookCommand) build() *cli.Command {
|
||||
hook.Subcommands = []*cli.Command{
|
||||
ldcache.NewCommand(m.logger),
|
||||
symlinks.NewCommand(m.logger),
|
||||
chmod.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
|
||||
47
cmd/nvidia-ctk/info/info.go
Normal file
47
cmd/nvidia-ctk/info/info.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an info command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "info",
|
||||
Usage: "Provide information about the system",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{}
|
||||
|
||||
return &hook
|
||||
}
|
||||
@@ -19,9 +19,12 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
|
||||
infoCLI "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
@@ -72,6 +75,8 @@ func main() {
|
||||
c.Commands = []*cli.Command{
|
||||
hook.NewCommand(logger),
|
||||
runtime.NewCommand(logger),
|
||||
infoCLI.NewCommand(logger),
|
||||
cdi.NewCommand(logger),
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
|
||||
@@ -22,7 +22,9 @@ import (
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker"
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
@@ -31,6 +33,7 @@ const (
|
||||
defaultRuntime = "docker"
|
||||
|
||||
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
|
||||
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
@@ -75,7 +78,7 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime",
|
||||
Usage: "the target runtime engine. One of [docker]",
|
||||
Usage: "the target runtime engine. One of [crio, docker]",
|
||||
Value: defaultRuntime,
|
||||
Destination: &config.runtime,
|
||||
},
|
||||
@@ -108,6 +111,8 @@ func (m command) build() *cli.Command {
|
||||
|
||||
func (m command) configureWrapper(c *cli.Context, config *config) error {
|
||||
switch config.runtime {
|
||||
case "crio":
|
||||
return m.configureCrio(c, config)
|
||||
case "docker":
|
||||
return m.configureDocker(c, config)
|
||||
}
|
||||
@@ -127,9 +132,12 @@ func (m command) configureDocker(c *cli.Context, config *config) error {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
defaultRuntime := config.nvidiaOptions.DefaultRuntime()
|
||||
runtimeConfig := config.nvidiaOptions.Runtime().DockerRuntimesConfig()
|
||||
err = docker.UpdateConfig(cfg, defaultRuntime, runtimeConfig)
|
||||
err = docker.UpdateConfig(
|
||||
cfg,
|
||||
config.nvidiaOptions.RuntimeName,
|
||||
config.nvidiaOptions.RuntimePath,
|
||||
config.nvidiaOptions.SetAsDefault,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
@@ -152,3 +160,44 @@ func (m command) configureDocker(c *cli.Context, config *config) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// configureCrio updates the crio config to enable the NVIDIA Container Runtime
|
||||
func (m command) configureCrio(c *cli.Context, config *config) error {
|
||||
configFilePath := config.configFilePath
|
||||
if configFilePath == "" {
|
||||
configFilePath = defaultCrioConfigFilePath
|
||||
}
|
||||
|
||||
cfg, err := crio.LoadConfig(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = crio.UpdateConfig(
|
||||
cfg,
|
||||
config.nvidiaOptions.RuntimeName,
|
||||
config.nvidiaOptions.RuntimePath,
|
||||
config.nvidiaOptions.SetAsDefault,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
if config.dryRun {
|
||||
output, err := toml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to TOML: %v", err)
|
||||
}
|
||||
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
|
||||
return nil
|
||||
}
|
||||
err = crio.FlushConfig(configFilePath, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Wrote updated config to %v", configFilePath)
|
||||
m.logger.Infof("It is recommended that the cri-o daemon be restarted.")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
#path = "/usr/bin/nvidia-container-cli"
|
||||
environment = []
|
||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||
#ldcache = "/etc/ld.so.cache"
|
||||
load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
@@ -1,76 +0,0 @@
|
||||
ARG BASEIMAGE
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
wget \
|
||||
git \
|
||||
rpm-build \
|
||||
make && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
RUN set -eux; \
|
||||
\
|
||||
arch="$(uname -m)"; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture"; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
| tar -C /usr/local -xz
|
||||
|
||||
ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
RUN mkdir -p $DIST_DIR /dist
|
||||
|
||||
# nvidia-container-toolkit
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
# This might not be useful on Amazon Linux, but it's simpler to keep the RHEL
|
||||
# and Amazon Linux packages identical.
|
||||
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
|
||||
|
||||
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
|
||||
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
|
||||
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release $RELEASE" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
@@ -76,6 +76,6 @@ RUN dch --create --package="${PKG_NAME}" \
|
||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||
|
||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/nvidia-container-toolkit_*.deb /dist
|
||||
mv /tmp/*.deb /dist
|
||||
|
||||
@@ -28,9 +28,9 @@ ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
ENV PKG_NAME ${PKG_NAME}
|
||||
ENV PKG_VERS ${PKG_VERS}
|
||||
ENV PKG_REV ${PKG_REV}
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
@@ -65,8 +65,8 @@ CMD arch=$(uname -m) && \
|
||||
-D "_topdir $PWD" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version $VERSION" \
|
||||
-D "version ${PKG_VERS}" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release $RELEASE" \
|
||||
-D "release ${PKG_REV}" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
|
||||
@@ -1,3 +1,19 @@
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This is the dockerfile for building packages on yum-based RPM systems.
|
||||
|
||||
ARG BASEIMAGE
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
@@ -30,9 +46,9 @@ ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
ENV PKG_NAME ${PKG_NAME}
|
||||
ENV PKG_VERS ${PKG_VERS}
|
||||
ENV PKG_REV ${PKG_REV}
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
@@ -67,8 +83,8 @@ CMD arch=$(uname -m) && \
|
||||
-D "_topdir $PWD" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version $VERSION" \
|
||||
-D "version ${PKG_VERS}" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release $RELEASE" \
|
||||
-D "release ${PKG_REV}" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
@@ -69,6 +69,6 @@ RUN dch --create --package="${PKG_NAME}" \
|
||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||
|
||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/*.deb /dist
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
# Supported OSs by architecture
|
||||
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
|
||||
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
|
||||
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
|
||||
AARCH64_TARGETS := centos8 rhel8 amazonlinux2
|
||||
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
|
||||
|
||||
# Define top-level build targets
|
||||
docker%: SHELL:=/bin/bash
|
||||
@@ -104,12 +104,26 @@ LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
|
||||
--centos%: OS := centos
|
||||
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private fedora target
|
||||
--fedora%: OS := fedora
|
||||
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
# The fedora(35) base image has very slow performance when building aarch64 packages.
|
||||
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
|
||||
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private amazonlinux target
|
||||
--amazonlinux%: OS := amazonlinux
|
||||
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
|
||||
# private opensuse-leap target
|
||||
--opensuse-leap%: OS = opensuse-leap
|
||||
@@ -123,8 +137,11 @@ LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
|
||||
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
|
||||
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
|
||||
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--rhel%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
|
||||
# We allow the CONFIG_TOML_SUFFIX to be overridden.
|
||||
CONFIG_TOML_SUFFIX ?= $(OS)
|
||||
|
||||
@@ -140,9 +157,9 @@ docker-build-%:
|
||||
--build-arg PKG_NAME="$(LIB_NAME)" \
|
||||
--build-arg PKG_VERS="$(LIB_VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
|
||||
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
|
||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
--file $(DOCKERFILE) .
|
||||
$(DOCKER) run \
|
||||
|
||||
37
go.mod
37
go.mod
@@ -1,18 +1,41 @@
|
||||
module github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
go 1.14
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.0.0
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.4.1-0.20220614144320-dc973e22f674
|
||||
github.com/containers/podman/v4 v4.0.3
|
||||
github.com/opencontainers/runc v1.1.3
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.5.2
|
||||
github.com/opencontainers/runc v1.1.4
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab
|
||||
github.com/pelletier/go-toml v1.9.4
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220922133427-1049a7fa76a9
|
||||
golang.org/x/mod v0.5.0
|
||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
|
||||
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6
|
||||
sigs.k8s.io/yaml v1.3.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/blang/semver v3.5.1+incompatible // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/fsnotify/fsnotify v1.5.4 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20220110225228-7e2d60f1e41f // indirect
|
||||
github.com/opencontainers/selinux v1.10.1 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
|
||||
)
|
||||
|
||||
125
internal/config/crio/crio.go
Normal file
125
internal/config/crio/crio.go
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package crio
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// LoadConfig loads the cri-o config from disk
|
||||
func LoadConfig(config string) (*toml.Tree, error) {
|
||||
log.Infof("Loading config: %v", config)
|
||||
|
||||
info, err := os.Stat(config)
|
||||
if os.IsExist(err) && info.IsDir() {
|
||||
return nil, fmt.Errorf("config file is a directory")
|
||||
}
|
||||
|
||||
configFile := config
|
||||
if os.IsNotExist(err) {
|
||||
configFile = "/dev/null"
|
||||
log.Infof("Config file does not exist, creating new one")
|
||||
}
|
||||
|
||||
cfg, err := toml.LoadFile(configFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Infof("Successfully loaded config")
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime
|
||||
func UpdateConfig(config *toml.Tree, runtimeClass string, runtimePath string, setAsDefault bool) error {
|
||||
switch runc := config.Get("crio.runtime.runtimes.runc").(type) {
|
||||
case *toml.Tree:
|
||||
runc, _ = toml.Load(runc.String())
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass}, runc)
|
||||
}
|
||||
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_path"}, runtimePath)
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_type"}, "oci")
|
||||
|
||||
if setAsDefault {
|
||||
config.SetPath([]string{"crio", "runtime", "default_runtime"}, runtimeClass)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime
|
||||
func RevertConfig(config *toml.Tree, runtimeClass string) error {
|
||||
if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok {
|
||||
if runtimeClass == runtime {
|
||||
config.DeletePath([]string{"crio", "runtime", "default_runtime"})
|
||||
}
|
||||
}
|
||||
|
||||
runtimeClassPath := []string{"crio", "runtime", "runtimes", runtimeClass}
|
||||
config.DeletePath(runtimeClassPath)
|
||||
for i := 0; i < len(runtimeClassPath); i++ {
|
||||
remainingPath := runtimeClassPath[:len(runtimeClassPath)-i]
|
||||
if entry, ok := config.GetPath(remainingPath).(*toml.Tree); ok {
|
||||
if len(entry.Keys()) != 0 {
|
||||
break
|
||||
}
|
||||
config.DeletePath(remainingPath)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FlushConfig flushes the updated/reverted config out to disk
|
||||
func FlushConfig(config string, cfg *toml.Tree) error {
|
||||
log.Infof("Flushing config")
|
||||
|
||||
output, err := cfg.ToTomlString()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to TOML: %v", err)
|
||||
}
|
||||
|
||||
switch len(output) {
|
||||
case 0:
|
||||
err := os.Remove(config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to remove empty file: %v", err)
|
||||
}
|
||||
log.Infof("Config empty, removing file")
|
||||
default:
|
||||
f, err := os.Create(config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open '%v' for writing: %v", config, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Successfully flushed config")
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -57,11 +57,7 @@ func LoadConfig(configFilePath string) (map[string]interface{}, error) {
|
||||
}
|
||||
|
||||
// UpdateConfig updates the docker config to include the nvidia runtimes
|
||||
func UpdateConfig(config map[string]interface{}, defaultRuntime string, newRuntimes map[string]interface{}) error {
|
||||
if defaultRuntime != "" {
|
||||
config["default-runtime"] = defaultRuntime
|
||||
}
|
||||
|
||||
func UpdateConfig(config map[string]interface{}, runtimeName string, runtimePath string, setAsDefault bool) error {
|
||||
// Read the existing runtimes
|
||||
runtimes := make(map[string]interface{})
|
||||
if _, exists := config["runtimes"]; exists {
|
||||
@@ -69,14 +65,20 @@ func UpdateConfig(config map[string]interface{}, defaultRuntime string, newRunti
|
||||
}
|
||||
|
||||
// Add / update the runtime definitions
|
||||
for name, rt := range newRuntimes {
|
||||
runtimes[name] = rt
|
||||
runtimes[runtimeName] = map[string]interface{}{
|
||||
"path": runtimePath,
|
||||
"args": []string{},
|
||||
}
|
||||
|
||||
// Update the runtimes definition
|
||||
if len(runtimes) > 0 {
|
||||
config["runtimes"] = runtimes
|
||||
}
|
||||
|
||||
if setAsDefault {
|
||||
config["default-runtime"] = runtimeName
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -27,30 +27,33 @@ import (
|
||||
func TestUpdateConfigDefaultRuntime(t *testing.T) {
|
||||
testCases := []struct {
|
||||
config map[string]interface{}
|
||||
defaultRuntime string
|
||||
runtimeName string
|
||||
setAsDefault bool
|
||||
expectedDefaultRuntimeName interface{}
|
||||
}{
|
||||
{
|
||||
defaultRuntime: "",
|
||||
setAsDefault: false,
|
||||
expectedDefaultRuntimeName: nil,
|
||||
},
|
||||
{
|
||||
defaultRuntime: "NAME",
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: true,
|
||||
expectedDefaultRuntimeName: "NAME",
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"default-runtime": "ALREADY_SET",
|
||||
},
|
||||
defaultRuntime: "",
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: false,
|
||||
expectedDefaultRuntimeName: "ALREADY_SET",
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"default-runtime": "ALREADY_SET",
|
||||
},
|
||||
defaultRuntime: "NAME",
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: true,
|
||||
expectedDefaultRuntimeName: "NAME",
|
||||
},
|
||||
}
|
||||
@@ -60,7 +63,7 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) {
|
||||
if tc.config == nil {
|
||||
tc.config = make(map[string]interface{})
|
||||
}
|
||||
err := UpdateConfig(tc.config, tc.defaultRuntime, nil)
|
||||
err := UpdateConfig(tc.config, tc.runtimeName, "", tc.setAsDefault)
|
||||
require.NoError(t, err)
|
||||
|
||||
defaultRuntimeName := tc.config["default-runtime"]
|
||||
@@ -72,20 +75,14 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) {
|
||||
func TestUpdateConfigRuntimes(t *testing.T) {
|
||||
testCases := []struct {
|
||||
config map[string]interface{}
|
||||
runtimes map[string]interface{}
|
||||
runtimes map[string]string
|
||||
expectedConfig map[string]interface{}
|
||||
}{
|
||||
{
|
||||
config: map[string]interface{}{},
|
||||
runtimes: map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
"runtime2": "/test/runtime/dir/runtime2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
@@ -109,15 +106,9 @@ func TestUpdateConfigRuntimes(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
runtimes: map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
"runtime2": "/test/runtime/dir/runtime2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
@@ -141,11 +132,8 @@ func TestUpdateConfigRuntimes(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
runtimes: map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
@@ -169,11 +157,8 @@ func TestUpdateConfigRuntimes(t *testing.T) {
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
runtimes: map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
@@ -212,8 +197,10 @@ func TestUpdateConfigRuntimes(t *testing.T) {
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
err := UpdateConfig(tc.config, "", tc.runtimes)
|
||||
require.NoError(t, err)
|
||||
for runtimeName, runtimePath := range tc.runtimes {
|
||||
err := UpdateConfig(tc.config, runtimeName, runtimePath, false)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
configContent, err := json.MarshalIndent(tc.config, "", " ")
|
||||
require.NoError(t, err)
|
||||
|
||||
54
internal/config/image/capabilities.go
Normal file
54
internal/config/image/capabilities.go
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
// DriverCapability represents the possible values of NVIDIA_DRIVER_CAPABILITIES
|
||||
type DriverCapability string
|
||||
|
||||
// Constants for the supported driver capabilities
|
||||
const (
|
||||
DriverCapabilityAll DriverCapability = "all"
|
||||
DriverCapabilityCompat32 DriverCapability = "compat32"
|
||||
DriverCapabilityCompute DriverCapability = "compute"
|
||||
DriverCapabilityDisplay DriverCapability = "display"
|
||||
DriverCapabilityGraphics DriverCapability = "graphics"
|
||||
DriverCapabilityNgx DriverCapability = "ngx"
|
||||
DriverCapabilityUtility DriverCapability = "utility"
|
||||
DriverCapabilityVideo DriverCapability = "video"
|
||||
)
|
||||
|
||||
// DriverCapabilities represents the NVIDIA_DRIVER_CAPABILITIES set for the specified image.
|
||||
type DriverCapabilities map[DriverCapability]bool
|
||||
|
||||
// Has check whether the specified capability is selected.
|
||||
func (c DriverCapabilities) Has(capability DriverCapability) bool {
|
||||
if c[DriverCapabilityAll] {
|
||||
return true
|
||||
}
|
||||
return c[capability]
|
||||
}
|
||||
|
||||
// Any checks whether any of the specified capabilites are set
|
||||
func (c DriverCapabilities) Any(capabilities ...DriverCapability) bool {
|
||||
for _, cap := range capabilities {
|
||||
if c.Has(cap) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
@@ -26,11 +26,12 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
)
|
||||
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
@@ -113,33 +114,48 @@ func (i CUDA) HasDisableRequire() bool {
|
||||
}
|
||||
|
||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) []string {
|
||||
// Grab a reference to devices from the first envvar
|
||||
// in the list that actually exists in the environment.
|
||||
var devices *string
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
// We concantenate all the devices from the specified envvars.
|
||||
var isSet bool
|
||||
var devices []string
|
||||
requested := make(map[string]bool)
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := i[envVar]; ok {
|
||||
devices = &devs
|
||||
break
|
||||
isSet = true
|
||||
for _, d := range strings.Split(devs, ",") {
|
||||
trimmed := strings.TrimSpace(d)
|
||||
if len(trimmed) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, trimmed)
|
||||
requested[trimmed] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if devices == nil && i.IsLegacy() {
|
||||
return []string{"all"}
|
||||
if !isSet && len(devices) == 0 && i.IsLegacy() {
|
||||
return NewVisibleDevices("all")
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
||||
return nil
|
||||
if len(devices) == 0 || requested["void"] {
|
||||
return NewVisibleDevices("void")
|
||||
}
|
||||
|
||||
// Environment variable set to "none": reset to "".
|
||||
if *devices == "none" {
|
||||
return []string{""}
|
||||
return NewVisibleDevices(devices...)
|
||||
}
|
||||
|
||||
// GetDriverCapabilities returns the requested driver capabilities.
|
||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
env := i[envNVDriverCapabilities]
|
||||
|
||||
capabilites := make(DriverCapabilities)
|
||||
for _, c := range strings.Split(env, ",") {
|
||||
capabilites[DriverCapability(c)] = true
|
||||
}
|
||||
|
||||
return strings.Split(*devices, ",")
|
||||
return capabilites
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
|
||||
127
internal/config/image/devices.go
Normal file
127
internal/config/image/devices.go
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// VisibleDevices represents the devices selected in a container image
|
||||
// through the NVIDIA_VISIBLE_DEVICES or other environment variables
|
||||
type VisibleDevices interface {
|
||||
List() []string
|
||||
Has(string) bool
|
||||
}
|
||||
|
||||
var _ VisibleDevices = (*all)(nil)
|
||||
var _ VisibleDevices = (*none)(nil)
|
||||
var _ VisibleDevices = (*void)(nil)
|
||||
var _ VisibleDevices = (*devices)(nil)
|
||||
|
||||
// NewVisibleDevices creates a VisibleDevices based on the value of the specified envvar.
|
||||
func NewVisibleDevices(envvars ...string) VisibleDevices {
|
||||
for _, envvar := range envvars {
|
||||
if envvar == "all" {
|
||||
return all{}
|
||||
}
|
||||
if envvar == "none" {
|
||||
return none{}
|
||||
}
|
||||
if envvar == "" || envvar == "void" {
|
||||
return void{}
|
||||
}
|
||||
}
|
||||
|
||||
return newDevices(envvars...)
|
||||
}
|
||||
|
||||
type all struct{}
|
||||
|
||||
// List returns ["all"] for all devices
|
||||
func (a all) List() []string {
|
||||
return []string{"all"}
|
||||
}
|
||||
|
||||
// Has for all devices is true for any id except the empty ID
|
||||
func (a all) Has(id string) bool {
|
||||
return id != ""
|
||||
}
|
||||
|
||||
type none struct{}
|
||||
|
||||
// List returns [""] for the none devices
|
||||
func (n none) List() []string {
|
||||
return []string{""}
|
||||
}
|
||||
|
||||
// Has for none devices is false for any id
|
||||
func (n none) Has(id string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
type void struct {
|
||||
none
|
||||
}
|
||||
|
||||
// List returns nil for the void devices
|
||||
func (v void) List() []string {
|
||||
return nil
|
||||
}
|
||||
|
||||
type devices struct {
|
||||
len int
|
||||
lookup map[string]int
|
||||
}
|
||||
|
||||
func newDevices(idOrCommaSeparated ...string) devices {
|
||||
lookup := make(map[string]int)
|
||||
|
||||
i := 0
|
||||
for _, commaSeparated := range idOrCommaSeparated {
|
||||
for _, id := range strings.Split(commaSeparated, ",") {
|
||||
lookup[id] = i
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
d := devices{
|
||||
len: i,
|
||||
lookup: lookup,
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// List returns the list of requested devices
|
||||
func (d devices) List() []string {
|
||||
list := make([]string, d.len)
|
||||
|
||||
for id, i := range d.lookup {
|
||||
list[i] = id
|
||||
}
|
||||
|
||||
return list
|
||||
}
|
||||
|
||||
// Has checks whether the specified ID is in the set of requested devices
|
||||
func (d devices) Has(id string) bool {
|
||||
if id == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
_, exist := d.lookup[id]
|
||||
return exist
|
||||
}
|
||||
62
internal/discover/filter.go
Normal file
62
internal/discover/filter.go
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import "github.com/sirupsen/logrus"
|
||||
|
||||
// Filter defines an interface for filtering discovered entities
|
||||
type Filter interface {
|
||||
DeviceIsSelected(device Device) bool
|
||||
}
|
||||
|
||||
// filtered represents a filtered discoverer
|
||||
type filtered struct {
|
||||
Discover
|
||||
logger *logrus.Logger
|
||||
filter Filter
|
||||
}
|
||||
|
||||
// newFilteredDisoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
|
||||
func newFilteredDisoverer(logger *logrus.Logger, applyTo Discover, filter Filter) Discover {
|
||||
return filtered{
|
||||
Discover: applyTo,
|
||||
logger: logger,
|
||||
filter: filter,
|
||||
}
|
||||
}
|
||||
|
||||
// Devices returns a filtered list of devices based on the specified filter.
|
||||
func (d filtered) Devices() ([]Device, error) {
|
||||
devices, err := d.Discover.Devices()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if d.filter == nil {
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
var selected []Device
|
||||
for _, device := range devices {
|
||||
if d.filter.DeviceIsSelected(device) {
|
||||
selected = append(selected, device)
|
||||
}
|
||||
d.logger.Debugf("skipping device %v", device)
|
||||
}
|
||||
|
||||
return selected, nil
|
||||
}
|
||||
254
internal/discover/graphics.go
Normal file
254
internal/discover/graphics.go
Normal file
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
|
||||
func NewGraphicsDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, cfg *Config) (Discover, error) {
|
||||
root := cfg.Root
|
||||
|
||||
locator, err := lookup.NewLibraryLocator(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct library locator: %v", err)
|
||||
}
|
||||
libraries := NewMounts(
|
||||
logger,
|
||||
locator,
|
||||
root,
|
||||
[]string{
|
||||
"libnvidia-egl-gbm.so",
|
||||
},
|
||||
)
|
||||
|
||||
jsonMounts := NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(logger, root),
|
||||
root,
|
||||
[]string{
|
||||
// TODO: We should handle this more cleanly
|
||||
"/etc/glvnd/egl_vendor.d/10_nvidia.json",
|
||||
"/etc/vulkan/icd.d/nvidia_icd.json",
|
||||
"/etc/vulkan/implicit_layer.d/nvidia_layers.json",
|
||||
"/usr/share/glvnd/egl_vendor.d/10_nvidia.json",
|
||||
"/usr/share/vulkan/icd.d/nvidia_icd.json",
|
||||
"/usr/share/vulkan/implicit_layer.d/nvidia_layers.json",
|
||||
"/usr/share/egl/egl_external_platform.d/15_nvidia_gbm.json",
|
||||
},
|
||||
)
|
||||
|
||||
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
|
||||
}
|
||||
|
||||
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, cfg)
|
||||
|
||||
discover := Merge(
|
||||
Merge(drmDeviceNodes, drmByPathSymlinks),
|
||||
libraries,
|
||||
jsonMounts,
|
||||
)
|
||||
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
type drmDevicesByPath struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
lookup lookup.Locator
|
||||
nvidiaCTKExecutablePath string
|
||||
root string
|
||||
devicesFrom Discover
|
||||
}
|
||||
|
||||
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
|
||||
func newCreateDRMByPathSymlinks(logger *logrus.Logger, devices Discover, cfg *Config) Discover {
|
||||
d := drmDevicesByPath{
|
||||
logger: logger,
|
||||
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
|
||||
nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath,
|
||||
root: cfg.Root,
|
||||
devicesFrom: devices,
|
||||
}
|
||||
|
||||
return &d
|
||||
}
|
||||
|
||||
// Hooks returns a hook to create the symlinks from the required CSV files
|
||||
func (d drmDevicesByPath) Hooks() ([]Hook, error) {
|
||||
devices, err := d.devicesFrom.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover devices for by-path symlinks: %v", err)
|
||||
}
|
||||
if len(devices) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
hookPath := nvidiaCTKDefaultFilePath
|
||||
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "hook", "create-symlinks"}
|
||||
links, err := d.getSpecificLinkArgs(devices)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine specific links: %v", err)
|
||||
}
|
||||
for _, l := range links {
|
||||
args = append(args, "--link", l)
|
||||
}
|
||||
|
||||
h := Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// getSpecificLinkArgs returns the required specic links that need to be created
|
||||
func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error) {
|
||||
selectedDevices := make(map[string]bool)
|
||||
for _, d := range devices {
|
||||
selectedDevices[filepath.Base(d.HostPath)] = true
|
||||
}
|
||||
|
||||
linkLocator := lookup.NewFileLocator(d.logger, d.root)
|
||||
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate devices by path: %v", err)
|
||||
}
|
||||
|
||||
var links []string
|
||||
for _, c := range candidates {
|
||||
device, err := os.Readlink(c)
|
||||
if err != nil {
|
||||
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", c)
|
||||
continue
|
||||
}
|
||||
|
||||
if selectedDevices[filepath.Base(device)] {
|
||||
d.logger.Debugf("adding device symlink %v -> %v", c, device)
|
||||
links = append(links, fmt.Sprintf("%v::%v", device, c))
|
||||
}
|
||||
}
|
||||
|
||||
return links, nil
|
||||
}
|
||||
|
||||
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
|
||||
func newDRMDeviceDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, root string) (Discover, error) {
|
||||
allDevices := NewDeviceDiscoverer(
|
||||
logger,
|
||||
lookup.NewCharDeviceLocator(logger, root),
|
||||
root,
|
||||
[]string{
|
||||
"/dev/dri/card*",
|
||||
"/dev/dri/renderD*",
|
||||
},
|
||||
)
|
||||
|
||||
filter, err := newDRMDeviceFilter(logger, devices, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
|
||||
}
|
||||
|
||||
// We return a discoverer that applies the DRM device filter created above to all discovered DRM device nodes.
|
||||
d := newFilteredDisoverer(
|
||||
logger,
|
||||
allDevices,
|
||||
filter,
|
||||
)
|
||||
|
||||
return d, err
|
||||
}
|
||||
|
||||
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
|
||||
func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, root string) (Filter, error) {
|
||||
gpuInformationPaths, err := proc.GetInformationFilePaths(root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read GPU information: %v", err)
|
||||
}
|
||||
|
||||
var selectedBusIds []string
|
||||
for _, f := range gpuInformationPaths {
|
||||
info, err := proc.ParseGPUInformationFile(f)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %v: %v", f, err)
|
||||
}
|
||||
uuid := info[proc.GPUInfoGPUUUID]
|
||||
busID := info[proc.GPUInfoBusLocation]
|
||||
minor := info[proc.GPUInfoDeviceMinor]
|
||||
|
||||
if devices.Has(minor) || devices.Has(uuid) || devices.Has(busID) {
|
||||
selectedBusIds = append(selectedBusIds, busID)
|
||||
}
|
||||
}
|
||||
|
||||
filter := make(selectDeviceByPath)
|
||||
for _, busID := range selectedBusIds {
|
||||
drmDeviceNodes, err := drm.GetDeviceNodesByBusID(busID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine DRM devices for %v: %v", busID, err)
|
||||
}
|
||||
for _, drmDeviceNode := range drmDeviceNodes {
|
||||
filter[filepath.Join(drmDeviceNode)] = true
|
||||
}
|
||||
}
|
||||
|
||||
return filter, nil
|
||||
}
|
||||
|
||||
// selectDeviceByPath is a filter that allows devices to be selected by the path
|
||||
type selectDeviceByPath map[string]bool
|
||||
|
||||
var _ Filter = (*selectDeviceByPath)(nil)
|
||||
|
||||
// DeviceIsSelected determines whether the device's path has been selected
|
||||
func (s selectDeviceByPath) DeviceIsSelected(device Device) bool {
|
||||
return s[device.Path]
|
||||
}
|
||||
|
||||
// MountIsSelected is always true
|
||||
func (s selectDeviceByPath) MountIsSelected(Mount) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// HookIsSelected is always true
|
||||
func (s selectDeviceByPath) HookIsSelected(Hook) bool {
|
||||
return true
|
||||
}
|
||||
@@ -19,7 +19,6 @@ package discover
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
@@ -57,58 +56,50 @@ func (d ldconfig) Hooks() ([]Hook, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
|
||||
}
|
||||
h := CreateLDCacheUpdateHook(
|
||||
d.logger,
|
||||
d.lookup,
|
||||
d.nvidiaCTKExecutablePath,
|
||||
nvidiaCTKDefaultFilePath,
|
||||
getLibraryPaths(mounts),
|
||||
)
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
libDirs := getLibDirs(mounts)
|
||||
|
||||
hookPath := nvidiaCTKDefaultFilePath
|
||||
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
|
||||
// CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache
|
||||
func CreateLDCacheUpdateHook(logger *logrus.Logger, lookup lookup.Locator, execuable string, defaultPath string, libraries []string) Hook {
|
||||
hookPath := defaultPath
|
||||
targets, err := lookup.Locate(execuable)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
|
||||
logger.Warnf("Failed to locate %v: %v", execuable, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
|
||||
logger.Warnf("%v not found", execuable)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
|
||||
logger.Debugf("Found %v candidates: %v", execuable, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "hook", "update-ldcache"}
|
||||
for _, f := range libDirs {
|
||||
args := []string{filepath.Base(hookPath), "hook", "update-ldcache"}
|
||||
for _, f := range uniqueFolders(libraries) {
|
||||
args = append(args, "--folder", f)
|
||||
}
|
||||
h := Hook{
|
||||
return Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// getLibDirs extracts the library dirs from the specified mounts
|
||||
func getLibDirs(mounts []Mount) []string {
|
||||
// getLibraryPaths extracts the library dirs from the specified mounts
|
||||
func getLibraryPaths(mounts []Mount) []string {
|
||||
var paths []string
|
||||
checked := make(map[string]bool)
|
||||
|
||||
for _, m := range mounts {
|
||||
dir := filepath.Dir(m.Path)
|
||||
if dir == "" {
|
||||
if !isLibName(m.Path) {
|
||||
continue
|
||||
}
|
||||
|
||||
_, exists := checked[dir]
|
||||
if exists {
|
||||
continue
|
||||
}
|
||||
checked[dir] = isLibName(m.Path)
|
||||
|
||||
if checked[dir] {
|
||||
paths = append(paths, dir)
|
||||
}
|
||||
paths = append(paths, m.Path)
|
||||
}
|
||||
|
||||
sort.Strings(paths)
|
||||
|
||||
return paths
|
||||
}
|
||||
|
||||
@@ -129,3 +120,22 @@ func isLibName(filename string) bool {
|
||||
|
||||
return parts[len(parts)-1] == "" || strings.HasPrefix(parts[len(parts)-1], ".")
|
||||
}
|
||||
|
||||
// uniqueFolders returns the unique set of folders for the specified files
|
||||
func uniqueFolders(libraries []string) []string {
|
||||
var paths []string
|
||||
checked := make(map[string]bool)
|
||||
|
||||
for _, l := range libraries {
|
||||
dir := filepath.Dir(l)
|
||||
if dir == "" {
|
||||
continue
|
||||
}
|
||||
if checked[dir] {
|
||||
continue
|
||||
}
|
||||
checked[dir] = true
|
||||
paths = append(paths, dir)
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
@@ -17,11 +17,110 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestLDCacheUpdateHook(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
cfg := Config{
|
||||
Root: "/",
|
||||
NVIDIAContainerToolkitCLIExecutablePath: "/foo/bar/nvidia-ctk",
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
mounts []Mount
|
||||
mountError error
|
||||
expectedError error
|
||||
expectedArgs []string
|
||||
}{
|
||||
{
|
||||
description: "empty mounts",
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache"},
|
||||
},
|
||||
{
|
||||
description: "mount error",
|
||||
mountError: fmt.Errorf("mountError"),
|
||||
expectedError: fmt.Errorf("mountError"),
|
||||
},
|
||||
{
|
||||
description: "library folders are added to args",
|
||||
mounts: []Mount{
|
||||
{
|
||||
Path: "/usr/local/lib/libfoo.so",
|
||||
},
|
||||
{
|
||||
Path: "/usr/bin/notlib",
|
||||
},
|
||||
{
|
||||
Path: "/usr/local/libother/libfoo.so",
|
||||
},
|
||||
{
|
||||
Path: "/usr/local/lib/libbar.so",
|
||||
},
|
||||
},
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
|
||||
},
|
||||
{
|
||||
description: "host paths are ignored",
|
||||
mounts: []Mount{
|
||||
{
|
||||
HostPath: "/usr/local/other/libfoo.so",
|
||||
Path: "/usr/local/lib/libfoo.so",
|
||||
},
|
||||
},
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
mountMock := &DiscoverMock{
|
||||
MountsFunc: func() ([]Mount, error) {
|
||||
return tc.mounts, tc.mountError
|
||||
},
|
||||
}
|
||||
expectedHook := Hook{
|
||||
Path: "/usr/bin/nvidia-ctk",
|
||||
Args: tc.expectedArgs,
|
||||
Lifecycle: "createContainer",
|
||||
}
|
||||
|
||||
d, err := NewLDCacheUpdateHook(logger, mountMock, &cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.Len(t, mountMock.MountsCalls(), 1)
|
||||
require.Len(t, mountMock.DevicesCalls(), 0)
|
||||
require.Len(t, mountMock.HooksCalls(), 0)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, hooks, 1)
|
||||
|
||||
require.EqualValues(t, hooks[0], expectedHook)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, mounts)
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIsLibName(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
|
||||
@@ -117,7 +117,7 @@ func (d symlinks) getSpecificLinkArgs() ([]string, error) {
|
||||
}
|
||||
|
||||
linkPath := filepath.Join(filepath.Dir(m.Path), link)
|
||||
links = append(links, "--link", fmt.Sprintf("%v:%v", target, linkPath))
|
||||
links = append(links, "--link", fmt.Sprintf("%v::%v", target, linkPath))
|
||||
linkProcessed[link] = true
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
|
||||
package info
|
||||
|
||||
import "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
|
||||
// Logger is a basic interface for logging to allow these functions to be called
|
||||
// from code where logrus is not used.
|
||||
type Logger interface {
|
||||
@@ -32,10 +34,12 @@ func ResolveAutoMode(logger Logger, mode string) (rmode string) {
|
||||
logger.Infof("Auto-detected mode as '%v'", rmode)
|
||||
}()
|
||||
|
||||
isTegra, reason := IsTegraSystem()
|
||||
nvinfo := info.New()
|
||||
|
||||
isTegra, reason := nvinfo.IsTegraSystem()
|
||||
logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
|
||||
|
||||
hasNVML, reason := HasNVML()
|
||||
hasNVML, reason := nvinfo.HasNvml()
|
||||
logger.Debugf("Has NVML? %v: %v", hasNVML, reason)
|
||||
|
||||
if isTegra && !hasNVML {
|
||||
|
||||
39
internal/info/drm/drm_devices.go
Normal file
39
internal/info/drm/drm_devices.go
Normal file
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package drm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GetDeviceNodesByBusID returns the DRM devices associated with the specified PCI bus ID
|
||||
func GetDeviceNodesByBusID(busID string) ([]string, error) {
|
||||
drmRoot := filepath.Join("/sys/bus/pci/devices", busID, "drm")
|
||||
matches, err := filepath.Glob(fmt.Sprintf("%s/*", drmRoot))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var drmDeviceNodes []string
|
||||
for _, m := range matches {
|
||||
drmDeviceNode := filepath.Join("/dev/dri", filepath.Base(m))
|
||||
drmDeviceNodes = append(drmDeviceNodes, drmDeviceNode)
|
||||
}
|
||||
|
||||
return drmDeviceNodes, nil
|
||||
}
|
||||
89
internal/info/proc/information_files.go
Normal file
89
internal/info/proc/information_files.go
Normal file
@@ -0,0 +1,89 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package proc
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GPUInfoField represents the field name for information specified in a GPU's information file
|
||||
type GPUInfoField string
|
||||
|
||||
// The following constants define the fields of interest from the GPU information file
|
||||
const (
|
||||
GPUInfoModel = GPUInfoField("Model")
|
||||
GPUInfoGPUUUID = GPUInfoField("GPU UUID")
|
||||
GPUInfoBusLocation = GPUInfoField("Bus Location")
|
||||
GPUInfoDeviceMinor = GPUInfoField("Device Minor")
|
||||
)
|
||||
|
||||
// GPUInfo stores the information for a GPU as determined from its associated information file
|
||||
type GPUInfo map[GPUInfoField]string
|
||||
|
||||
// GetInformationFilePaths returns the list of information files associated with NVIDIA GPUs.
|
||||
func GetInformationFilePaths(root string) ([]string, error) {
|
||||
return filepath.Glob(filepath.Join(root, "/proc/driver/nvidia/gpus/*/information"))
|
||||
}
|
||||
|
||||
// ParseGPUInformationFile parses the specified GPU information file and constructs a GPUInfo structure
|
||||
func ParseGPUInformationFile(path string) (GPUInfo, error) {
|
||||
infoFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open %v: %v", path, err)
|
||||
}
|
||||
defer infoFile.Close()
|
||||
|
||||
return gpuInfoFrom(infoFile), nil
|
||||
}
|
||||
|
||||
// gpuInfoFrom parses a GPUInfo struct from the specified reader
|
||||
// An information file has the following strucutre:
|
||||
// $ cat /proc/driver/nvidia/gpus/0000\:06\:00.0/information
|
||||
// Model: Tesla V100-SXM2-16GB
|
||||
// IRQ: 408
|
||||
// GPU UUID: GPU-edfee158-11c1-52b8-0517-92f30e7fac88
|
||||
// Video BIOS: 88.00.41.00.01
|
||||
// Bus Type: PCIe
|
||||
// DMA Size: 47 bits
|
||||
// DMA Mask: 0x7fffffffffff
|
||||
// Bus Location: 0000:06:00.0
|
||||
// Device Minor: 0
|
||||
// GPU Excluded: No
|
||||
func gpuInfoFrom(reader io.Reader) GPUInfo {
|
||||
info := make(GPUInfo)
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
field := GPUInfoField(parts[0])
|
||||
value := strings.TrimSpace(parts[1])
|
||||
|
||||
info[field] = value
|
||||
}
|
||||
|
||||
return info
|
||||
}
|
||||
292
internal/ldcache/ldcache.go
Normal file
292
internal/ldcache/ldcache.go
Normal file
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
// Adapted from https://github.com/rai-project/ldcache
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const ldcachePath = "/etc/ld.so.cache"
|
||||
|
||||
const (
|
||||
magicString1 = "ld.so-1.7.0"
|
||||
magicString2 = "glibc-ld.so.cache"
|
||||
magicVersion = "1.1"
|
||||
)
|
||||
|
||||
const (
|
||||
flagTypeMask = 0x00ff
|
||||
flagTypeELF = 0x0001
|
||||
|
||||
flagArchMask = 0xff00
|
||||
flagArchI386 = 0x0000
|
||||
flagArchX8664 = 0x0300
|
||||
flagArchX32 = 0x0800
|
||||
flagArchPpc64le = 0x0500
|
||||
)
|
||||
|
||||
var errInvalidCache = errors.New("invalid ld.so.cache file")
|
||||
|
||||
type header1 struct {
|
||||
Magic [len(magicString1) + 1]byte // include null delimiter
|
||||
NLibs uint32
|
||||
}
|
||||
|
||||
type entry1 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
}
|
||||
|
||||
type header2 struct {
|
||||
Magic [len(magicString2)]byte
|
||||
Version [len(magicVersion)]byte
|
||||
NLibs uint32
|
||||
TableSize uint32
|
||||
_ [3]uint32 // unused
|
||||
_ uint64 // force 8 byte alignment
|
||||
}
|
||||
|
||||
type entry2 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
OSVersion uint32
|
||||
HWCap uint64
|
||||
}
|
||||
|
||||
// LDCache represents the interface for performing lookups into the LDCache
|
||||
type LDCache interface {
|
||||
List() ([]string, []string)
|
||||
Lookup(...string) ([]string, []string)
|
||||
}
|
||||
|
||||
type ldcache struct {
|
||||
*bytes.Reader
|
||||
|
||||
data, libs []byte
|
||||
header header2
|
||||
entries []entry2
|
||||
|
||||
root string
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// New creates a new LDCache with the specified logger and root.
|
||||
func New(logger *log.Logger, root string) (LDCache, error) {
|
||||
path := filepath.Join(root, ldcachePath)
|
||||
|
||||
logger.Debugf("Opening ld.conf at %v", path)
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d, err := syscall.Mmap(int(f.Fd()), 0, int(fi.Size()),
|
||||
syscall.PROT_READ, syscall.MAP_PRIVATE)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache := &ldcache{
|
||||
data: d,
|
||||
Reader: bytes.NewReader(d),
|
||||
root: root,
|
||||
logger: logger,
|
||||
}
|
||||
return cache, cache.parse()
|
||||
}
|
||||
|
||||
func (c *ldcache) Close() error {
|
||||
return syscall.Munmap(c.data)
|
||||
}
|
||||
|
||||
func (c *ldcache) Magic() string {
|
||||
return string(c.header.Magic[:])
|
||||
}
|
||||
|
||||
func (c *ldcache) Version() string {
|
||||
return string(c.header.Version[:])
|
||||
}
|
||||
|
||||
func strn(b []byte, n int) string {
|
||||
return string(b[:n])
|
||||
}
|
||||
|
||||
func (c *ldcache) parse() error {
|
||||
var header header1
|
||||
|
||||
// Check for the old format (< glibc-2.2)
|
||||
if c.Len() <= int(unsafe.Sizeof(header)) {
|
||||
return errInvalidCache
|
||||
}
|
||||
if strn(c.data, len(magicString1)) == magicString1 {
|
||||
if err := binary.Read(c, binary.LittleEndian, &header); err != nil {
|
||||
return err
|
||||
}
|
||||
n := int64(header.NLibs) * int64(unsafe.Sizeof(entry1{}))
|
||||
offset, err := c.Seek(n, 1) // skip old entries
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n = (-offset) & int64(unsafe.Alignof(c.header)-1)
|
||||
_, err = c.Seek(n, 1) // skip padding
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c.libs = c.data[c.Size()-int64(c.Len()):] // kv offsets start here
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.header); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Magic() != magicString2 || c.Version() != magicVersion {
|
||||
return errInvalidCache
|
||||
}
|
||||
c.entries = make([]entry2, c.header.NLibs)
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.entries); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// List creates a list of libraires in the ldcache.
|
||||
// The 32-bit and 64-bit libraries are returned separately.
|
||||
func (c *ldcache) List() ([]string, []string) {
|
||||
paths := make(map[int][]string)
|
||||
|
||||
processed := make(map[string]bool)
|
||||
|
||||
for _, e := range c.entries {
|
||||
bits := 0
|
||||
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
|
||||
continue
|
||||
}
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
bits = 64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
bits = 32
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
|
||||
continue
|
||||
}
|
||||
value := c.libs[e.Value:]
|
||||
|
||||
n := bytes.IndexByte(value, 0)
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
name := filepath.Join(c.root, strn(value, n))
|
||||
c.logger.Debugf("checking %v", string(name))
|
||||
|
||||
path, err := filepath.EvalSymlinks(name)
|
||||
if err != nil {
|
||||
c.logger.Debugf("could not resolve symlink for %v", name)
|
||||
break
|
||||
}
|
||||
if processed[path] {
|
||||
continue
|
||||
}
|
||||
paths[bits] = append(paths[bits], path)
|
||||
processed[path] = true
|
||||
}
|
||||
|
||||
return paths[32], paths[64]
|
||||
}
|
||||
|
||||
// Lookup searches the ldcache for the specified prefixes.
|
||||
// The 32-bit and 64-bit libraries matching the prefixes are returned.
|
||||
func (c *ldcache) Lookup(libs ...string) (paths32, paths64 []string) {
|
||||
c.logger.Debugf("Looking up %v in cache", libs)
|
||||
type void struct{}
|
||||
var paths *[]string
|
||||
|
||||
set := make(map[string]void)
|
||||
prefix := make([][]byte, len(libs))
|
||||
|
||||
for i := range libs {
|
||||
prefix[i] = []byte(libs[i])
|
||||
}
|
||||
for _, e := range c.entries {
|
||||
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
|
||||
continue
|
||||
}
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
paths = &paths64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
paths = &paths32
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
|
||||
continue
|
||||
}
|
||||
lib := c.libs[e.Key:]
|
||||
value := c.libs[e.Value:]
|
||||
|
||||
for _, p := range prefix {
|
||||
if bytes.HasPrefix(lib, p) {
|
||||
n := bytes.IndexByte(value, 0)
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
name := filepath.Join(c.root, strn(value, n))
|
||||
c.logger.Debugf("checking %v", string(name))
|
||||
|
||||
path, err := filepath.EvalSymlinks(name)
|
||||
if err != nil {
|
||||
c.logger.Debugf("could not resolve symlink for %v", name)
|
||||
break
|
||||
}
|
||||
if _, ok := set[path]; ok {
|
||||
break
|
||||
}
|
||||
set[path] = void{}
|
||||
*paths = append(*paths, path)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
68
internal/lookup/library.go
Normal file
68
internal/lookup/library.go
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type library struct {
|
||||
logger *log.Logger
|
||||
symlink Locator
|
||||
cache ldcache.LDCache
|
||||
}
|
||||
|
||||
var _ Locator = (*library)(nil)
|
||||
|
||||
// NewLibraryLocator creates a library locator using the specified logger.
|
||||
func NewLibraryLocator(logger *log.Logger, root string) (Locator, error) {
|
||||
cache, err := ldcache.New(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading ldcache: %v", err)
|
||||
}
|
||||
|
||||
l := library{
|
||||
symlink: NewSymlinkLocator(logger, root),
|
||||
cache: cache,
|
||||
}
|
||||
|
||||
return &l, nil
|
||||
}
|
||||
|
||||
// Locate finds the specified libraryname.
|
||||
// If the input is a library name, the ldcache is searched otherwise the
|
||||
// provided path is resolved as a symlink.
|
||||
func (l library) Locate(libname string) ([]string, error) {
|
||||
if strings.Contains(libname, "/") {
|
||||
return l.symlink.Locate(libname)
|
||||
}
|
||||
|
||||
paths32, paths64 := l.cache.Lookup(libname)
|
||||
if len(paths32) > 0 {
|
||||
l.logger.Warnf("Ignoring 32-bit libraries for %v: %v", libname, paths32)
|
||||
}
|
||||
|
||||
if len(paths64) == 0 {
|
||||
return nil, fmt.Errorf("64-bit library %v not found", libname)
|
||||
}
|
||||
|
||||
return paths64, nil
|
||||
}
|
||||
@@ -80,7 +80,7 @@ func getDevicesFromSpec(ociSpec oci.Spec) ([]string, error) {
|
||||
}
|
||||
|
||||
uniqueDevices := make(map[string]struct{})
|
||||
for _, name := range append(envDevices, annotationDevices...) {
|
||||
for _, name := range append(envDevices.List(), annotationDevices...) {
|
||||
if !cdi.IsQualifiedName(name) {
|
||||
name = cdi.QualifiedName("nvidia.com", "gpu", name)
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices) == 0 {
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
||||
logger.Infof("No modification required; no devices requested")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ func NewGDSModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices) == 0 {
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
||||
logger.Infof("No modification required; no devices requested")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
74
internal/modifier/graphics.go
Normal file
74
internal/modifier/graphics.go
Normal file
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification.
|
||||
// The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
|
||||
func NewGraphicsModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
rawSpec, err := ociSpec.Load()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if required, reason := requiresGraphicsModifier(image); !required {
|
||||
logger.Infof("No graphics modifier required: %v", reason)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
config := &discover.Config{
|
||||
Root: cfg.NVIDIAContainerCLIConfig.Root,
|
||||
NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path,
|
||||
}
|
||||
d, err := discover.NewGraphicsDiscoverer(
|
||||
logger,
|
||||
image.DevicesFromEnvvars(visibleDevicesEnvvar),
|
||||
config,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct discoverer: %v", err)
|
||||
}
|
||||
|
||||
return NewModifierFromDiscoverer(logger, d)
|
||||
}
|
||||
|
||||
// requiresGraphicsModifier determines whether a graphics modifier is required.
|
||||
func requiresGraphicsModifier(cudaImage image.CUDA) (bool, string) {
|
||||
if devices := cudaImage.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
||||
return false, "no devices requested"
|
||||
}
|
||||
|
||||
if !cudaImage.GetDriverCapabilities().Any(image.DriverCapabilityGraphics, image.DriverCapabilityDisplay) {
|
||||
return false, "no required capabilities requested"
|
||||
}
|
||||
|
||||
return true, ""
|
||||
}
|
||||
96
internal/modifier/graphics_test.go
Normal file
96
internal/modifier/graphics_test.go
Normal file
@@ -0,0 +1,96 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGraphicsModifier(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
cudaImage image.CUDA
|
||||
expectedRequired bool
|
||||
}{
|
||||
{
|
||||
description: "empty image does not create modifier",
|
||||
},
|
||||
{
|
||||
description: "devices with no capabilities does not create modifier",
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "devices with no non-graphics does not create modifier",
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "compute",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "devices with all capabilities creates modifier",
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "all",
|
||||
},
|
||||
expectedRequired: true,
|
||||
},
|
||||
{
|
||||
description: "devices with graphics capability creates modifier",
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "graphics",
|
||||
},
|
||||
expectedRequired: true,
|
||||
},
|
||||
{
|
||||
description: "devices with compute,graphics capability creates modifier",
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "compute,graphics",
|
||||
},
|
||||
expectedRequired: true,
|
||||
},
|
||||
{
|
||||
description: "devices with display capability creates modifier",
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "display",
|
||||
},
|
||||
expectedRequired: true,
|
||||
},
|
||||
{
|
||||
description: "devices with display,graphics capability creates modifier",
|
||||
cudaImage: image.CUDA{
|
||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES": "display,graphics",
|
||||
},
|
||||
expectedRequired: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
required, _ := requiresGraphicsModifier(tc.cudaImage)
|
||||
require.EqualValues(t, tc.expectedRequired, required)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -43,7 +43,7 @@ func NewMOFEDModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spe
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices) == 0 {
|
||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
||||
logger.Infof("No modification required; no devices requested")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -17,11 +17,10 @@
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -44,21 +43,8 @@ type stableRuntimeModifier struct {
|
||||
// Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook
|
||||
// as a prestart hook.
|
||||
func (m stableRuntimeModifier) Modify(spec *specs.Spec) error {
|
||||
path, err := exec.LookPath(config.NVIDIAContainerRuntimeHookExecutable)
|
||||
if err != nil {
|
||||
path = filepath.Join(config.DefaultExecutableDir, config.NVIDIAContainerRuntimeHookExecutable)
|
||||
_, err = os.Stat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
m.logger.Infof("Using prestart hook path: %s", path)
|
||||
|
||||
args := []string{path}
|
||||
if spec.Hooks == nil {
|
||||
spec.Hooks = &specs.Hooks{}
|
||||
} else if len(spec.Hooks.Prestart) != 0 {
|
||||
// If an NVIDIA Container Runtime Hook already exists, we don't make any modifications to the spec.
|
||||
if spec.Hooks != nil {
|
||||
for _, hook := range spec.Hooks.Prestart {
|
||||
if isNVIDIAContainerRuntimeHook(&hook) {
|
||||
m.logger.Infof("Existing nvidia prestart hook (%v) found in OCI spec", hook.Path)
|
||||
@@ -67,6 +53,21 @@ func (m stableRuntimeModifier) Modify(spec *specs.Spec) error {
|
||||
}
|
||||
}
|
||||
|
||||
// We create a locator and look for the NVIDIA Container Runtime Hook in the path.
|
||||
candidates, err := lookup.NewExecutableLocator(m.logger, "").Locate(config.NVIDIAContainerRuntimeHookExecutable)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to locate NVIDIA Container Runtime Hook: %v", err)
|
||||
}
|
||||
path := candidates[0]
|
||||
if len(candidates) > 1 {
|
||||
m.logger.Debugf("Using %v from multiple NVIDIA Container Runtime Hook candidates: %v", path, candidates)
|
||||
}
|
||||
|
||||
m.logger.Infof("Using prestart hook path: %v", path)
|
||||
args := []string{path}
|
||||
if spec.Hooks == nil {
|
||||
spec.Hooks = &specs.Hooks{}
|
||||
}
|
||||
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
|
||||
Path: path,
|
||||
Args: append(args, "prestart"),
|
||||
|
||||
45
internal/modifier/tegra.go
Normal file
45
internal/modifier/tegra.go
Normal file
@@ -0,0 +1,45 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
)
|
||||
|
||||
// NewTegraPlatformFiles creates a modifier to inject the Tegra platform files into a container.
|
||||
func NewTegraPlatformFiles(logger *logrus.Logger) (oci.SpecModifier, error) {
|
||||
isTegra, _ := info.New().IsTegraSystem()
|
||||
if !isTegra {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
tegraSystemMounts := discover.NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(logger, ""),
|
||||
"",
|
||||
[]string{
|
||||
"/etc/nv_tegra_release",
|
||||
"/sys/devices/soc0/family",
|
||||
},
|
||||
)
|
||||
|
||||
return NewModifierFromDiscoverer(logger, tegraSystemMounts)
|
||||
}
|
||||
166
internal/nvcaps/nvcaps.go
Normal file
166
internal/nvcaps/nvcaps.go
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvcaps
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaProcDriverPath = "/proc/driver/nvidia"
|
||||
nvidiaCapabilitiesPath = nvidiaProcDriverPath + "/capabilities"
|
||||
|
||||
nvcapsProcDriverPath = "/proc/driver/nvidia-caps"
|
||||
nvcapsMigMinorsPath = nvcapsProcDriverPath + "/mig-minors"
|
||||
nvcapsDevicePath = "/dev/nvidia-caps"
|
||||
)
|
||||
|
||||
// MigMinor represents the minor number of a MIG device
|
||||
type MigMinor int
|
||||
|
||||
// MigCap represents the path to a MIG cap file
|
||||
type MigCap string
|
||||
|
||||
// MigCaps stores a map of MIG cap file paths to MIG minors
|
||||
type MigCaps map[MigCap]MigMinor
|
||||
|
||||
// NewGPUInstanceCap creates a MigCap for the specified MIG GPU instance.
|
||||
// A GPU instance is uniquely defined by the GPU minor number and GI instance ID.
|
||||
func NewGPUInstanceCap(gpu, gi int) MigCap {
|
||||
return MigCap(fmt.Sprintf("gpu%d/gi%d/access", gpu, gi))
|
||||
}
|
||||
|
||||
// NewComputeInstanceCap creates a MigCap for the specified MIG Compute instance.
|
||||
// A GPU instance is uniquely defined by the GPU minor number, GI instance ID, and CI instance ID.
|
||||
func NewComputeInstanceCap(gpu, gi, ci int) MigCap {
|
||||
return MigCap(fmt.Sprintf("gpu%d/gi%d/ci%d/access", gpu, gi, ci))
|
||||
}
|
||||
|
||||
// GetCapDevicePath returns the path to the cap device for the specified cap.
|
||||
// An error is returned if the cap is invalid.
|
||||
func (m MigCaps) GetCapDevicePath(cap MigCap) (string, error) {
|
||||
minor, exists := m[cap]
|
||||
if !exists {
|
||||
return "", fmt.Errorf("invalid MIG capability path %v", cap)
|
||||
}
|
||||
return minor.DevicePath(), nil
|
||||
}
|
||||
|
||||
// NewMigCaps creates a MigCaps structure based on the contents of the MIG minors file.
|
||||
func NewMigCaps() (MigCaps, error) {
|
||||
// Open nvcapsMigMinorsPath for walking.
|
||||
// If the nvcapsMigMinorsPath does not exist, then we are not on a MIG
|
||||
// capable machine, so there is nothing to do.
|
||||
// The format of this file is discussed in:
|
||||
// https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#unique_1576522674
|
||||
minorsFile, err := os.Open(nvcapsMigMinorsPath)
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening MIG minors file: %v", err)
|
||||
}
|
||||
defer minorsFile.Close()
|
||||
|
||||
return processMinorsFile(minorsFile), nil
|
||||
}
|
||||
|
||||
func processMinorsFile(minorsFile io.Reader) MigCaps {
|
||||
// Walk each line of nvcapsMigMinorsPath and construct a mapping of nvidia
|
||||
// capabilities path to device minor for that capability
|
||||
migCaps := make(MigCaps)
|
||||
scanner := bufio.NewScanner(minorsFile)
|
||||
for scanner.Scan() {
|
||||
cap, minor, err := processMigMinorsLine(scanner.Text())
|
||||
if err != nil {
|
||||
log.Printf("Skipping line in MIG minors file: %v", err)
|
||||
continue
|
||||
}
|
||||
migCaps[cap] = minor
|
||||
}
|
||||
return migCaps
|
||||
}
|
||||
|
||||
func processMigMinorsLine(line string) (MigCap, MigMinor, error) {
|
||||
parts := strings.Split(line, " ")
|
||||
if len(parts) != 2 {
|
||||
return "", 0, fmt.Errorf("error processing line: %v", line)
|
||||
}
|
||||
|
||||
migCap := MigCap(parts[0])
|
||||
if !migCap.isValid() {
|
||||
return "", 0, fmt.Errorf("invalid MIG minors line: '%v'", line)
|
||||
}
|
||||
|
||||
minor, err := strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("error reading MIG minor from '%v': %v", line, err)
|
||||
}
|
||||
|
||||
return migCap, MigMinor(minor), nil
|
||||
}
|
||||
|
||||
func (m MigCap) isValid() bool {
|
||||
cap := string(m)
|
||||
switch cap {
|
||||
case "config", "monitor":
|
||||
return true
|
||||
default:
|
||||
var gpu int
|
||||
var gi int
|
||||
var ci int
|
||||
// Look for a CI access file
|
||||
n, _ := fmt.Sscanf(cap, "gpu%d/gi%d/ci%d/access", &gpu, &gi, &ci)
|
||||
if n == 3 {
|
||||
return true
|
||||
}
|
||||
// Look for a GI access file
|
||||
n, _ = fmt.Sscanf(cap, "gpu%d/gi%d/access %d", &gpu, &gi)
|
||||
if n == 2 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProcPath returns the proc path associated with the MIG capability
|
||||
func (m MigCap) ProcPath() string {
|
||||
id := string(m)
|
||||
|
||||
var path string
|
||||
switch id {
|
||||
case "config", "monitor":
|
||||
path = "mig/" + id
|
||||
default:
|
||||
parts := strings.SplitN(id, "/", 2)
|
||||
path = strings.Join([]string{parts[0], "mig", parts[1]}, "/")
|
||||
}
|
||||
return filepath.Join(nvidiaCapabilitiesPath, path)
|
||||
}
|
||||
|
||||
// DevicePath returns the path for the nvidia-caps device with the specified
|
||||
// minor number
|
||||
func (m MigMinor) DevicePath() string {
|
||||
return fmt.Sprintf(nvcapsDevicePath+"/nvidia-cap%d", m)
|
||||
}
|
||||
100
internal/nvcaps/nvcaps_test.go
Normal file
100
internal/nvcaps/nvcaps_test.go
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvcaps
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestProcessMinorsFile(t *testing.T) {
|
||||
testCases := []struct {
|
||||
lines []string
|
||||
expected MigCaps
|
||||
}{
|
||||
{[]string{}, MigCaps{}},
|
||||
{[]string{"invalidLine"}, MigCaps{}},
|
||||
{[]string{"config 1"}, MigCaps{"config": 1}},
|
||||
{[]string{"gpu0/gi0/ci0/access 4"}, MigCaps{"gpu0/gi0/ci0/access": 4}},
|
||||
{[]string{"config 1", "invalidLine"}, MigCaps{"config": 1}},
|
||||
{[]string{"config 1", "gpu0/gi0/ci0/access 4"}, MigCaps{"config": 1, "gpu0/gi0/ci0/access": 4}},
|
||||
}
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) {
|
||||
contents := strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
d := processMinorsFile(contents)
|
||||
require.Equal(t, tc.expected, d)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessMigMinorsLine(t *testing.T) {
|
||||
testCases := []struct {
|
||||
line string
|
||||
cap MigCap
|
||||
minor MigMinor
|
||||
err bool
|
||||
}{
|
||||
{"config 1", "config", 1, false},
|
||||
{"monitor 2", "monitor", 2, false},
|
||||
{"gpu0/gi0/access 3", "gpu0/gi0/access", 3, false},
|
||||
{"gpu0/gi0/ci0/access 4", "gpu0/gi0/ci0/access", 4, false},
|
||||
{"notconfig 99", "", 0, true},
|
||||
{"config notanint", "", 0, true},
|
||||
{"", "", 0, true},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) {
|
||||
cap, minor, err := processMigMinorsLine(tc.line)
|
||||
|
||||
require.Equal(t, tc.cap, cap)
|
||||
require.Equal(t, tc.minor, minor)
|
||||
if tc.err {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigCapProcPaths(t *testing.T) {
|
||||
testCases := []struct {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"config", "/proc/driver/nvidia/capabilities/mig/config"},
|
||||
{"monitor", "/proc/driver/nvidia/capabilities/mig/monitor"},
|
||||
{"gpu0/gi0/access", "/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"},
|
||||
{"gpu0/gi0/ci0/access", "/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"},
|
||||
}
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) {
|
||||
m := MigCap(tc.input)
|
||||
require.Equal(t, tc.expected, m.ProcPath())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigMinorDevicePath(t *testing.T) {
|
||||
m := MigMinor(0)
|
||||
require.Equal(t, "/dev/nvidia-caps/nvidia-cap0", m.DevicePath())
|
||||
}
|
||||
@@ -10,8 +10,16 @@ Build-Depends: debhelper (>= 9)
|
||||
|
||||
Package: nvidia-container-toolkit
|
||||
Architecture: any
|
||||
Depends: ${misc:Depends}, libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0), libseccomp2
|
||||
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0), libseccomp2
|
||||
Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
|
||||
Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
|
||||
Description: NVIDIA Container toolkit
|
||||
Provides tools and utilities to enable GPU support in containers.
|
||||
|
||||
Package: nvidia-container-toolkit-base
|
||||
Architecture: any
|
||||
Depends: ${misc:Depends}
|
||||
Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook, nvidia-container-toolkit (<= 1.10.0-1)
|
||||
Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
|
||||
Description: NVIDIA Container Toolkit Base
|
||||
Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit CLI to enable GPU support in containers.
|
||||
|
||||
3
packaging/debian/nvidia-container-toolkit-base.install
Normal file
3
packaging/debian/nvidia-container-toolkit-base.install
Normal file
@@ -0,0 +1,3 @@
|
||||
config.toml /etc/nvidia-container-runtime
|
||||
nvidia-container-runtime /usr/bin
|
||||
nvidia-ctk /usr/bin
|
||||
@@ -1,4 +1 @@
|
||||
config.toml /etc/nvidia-container-runtime
|
||||
nvidia-container-runtime-hook /usr/bin
|
||||
nvidia-container-runtime /usr/bin
|
||||
nvidia-ctk /usr/bin
|
||||
|
||||
@@ -4,6 +4,7 @@ set -e
|
||||
|
||||
sed -i "s;@SECTION@;${SECTION:+$SECTION/};g" debian/control
|
||||
sed -i "s;@LIBNVIDIA_CONTAINER_TOOLS_VERSION@;${LIBNVIDIA_CONTAINER_TOOLS_VERSION:+$LIBNVIDIA_CONTAINER_TOOLS_VERSION};g" debian/control
|
||||
sed -i "s;@VERSION@;${VERSION:+$VERSION};g" debian/control
|
||||
|
||||
if [ -n "$DISTRIB" ]; then
|
||||
sed -i "s;UNRELEASED;$DISTRIB;" debian/changelog
|
||||
|
||||
@@ -18,10 +18,11 @@ Source4: oci-nvidia-hook
|
||||
Source5: oci-nvidia-hook.json
|
||||
Source6: LICENSE
|
||||
|
||||
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook
|
||||
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
|
||||
Provides: nvidia-container-runtime
|
||||
Provides: nvidia-container-runtime-hook
|
||||
Requires: libnvidia-container-tools >= %{libnvidia_container_tools_version}, libnvidia-container-tools < 2.0.0
|
||||
Requires: nvidia-container-toolkit-base == %{version}-%{release}
|
||||
|
||||
%if 0%{?suse_version}
|
||||
Requires: libseccomp2
|
||||
@@ -55,14 +56,11 @@ install -m 644 -t %{buildroot}/usr/share/containers/oci/hooks.d oci-nvidia-hook.
|
||||
ln -sf %{_bindir}/nvidia-container-runtime-hook %{_bindir}/nvidia-container-toolkit
|
||||
|
||||
%postun
|
||||
rm -f %{_bindir}/nvidia-container-runtime-toolkit
|
||||
if [ -L %{_bindir}/nvidia-container-toolkit ] then; rm -f %{_bindir}/nvidia-container-toolkit; fi
|
||||
|
||||
%files
|
||||
%license LICENSE
|
||||
%{_bindir}/nvidia-container-runtime-hook
|
||||
%{_bindir}/nvidia-container-runtime
|
||||
%{_bindir}/nvidia-ctk
|
||||
%config /etc/nvidia-container-runtime/config.toml
|
||||
/usr/libexec/oci/hooks.d/oci-nvidia-hook
|
||||
/usr/share/containers/oci/hooks.d/oci-nvidia-hook.json
|
||||
|
||||
@@ -71,3 +69,22 @@ rm -f %{_bindir}/nvidia-container-runtime-toolkit
|
||||
* %{release_date} NVIDIA CORPORATION <cudatools@nvidia.com> %{version}-%{release}
|
||||
- See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/%{git_commit}/CHANGELOG.md
|
||||
- Bump libnvidia-container dependency to libnvidia-container-tools >= %{libnvidia_container_tools_version}
|
||||
|
||||
# The BASE package consists of the NVIDIA Container Runtime and the NVIDIA Container Toolkit CLI.
|
||||
# This allows the package to be installed on systems where no NVIDIA Container CLI is available.
|
||||
%package base
|
||||
Summary: NVIDIA Container Toolkit Base
|
||||
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
|
||||
Provides: nvidia-container-runtime
|
||||
# Since this package allows certain components of the NVIDIA Container Toolkit to be installed separately
|
||||
# it conflicts with older versions of the nvidia-container-toolkit package that also provide these files.
|
||||
Conflicts: nvidia-container-toolkit <= 1.10.0-1
|
||||
|
||||
%description base
|
||||
Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit CLI to enable GPU support in containers.
|
||||
|
||||
%files base
|
||||
%license LICENSE
|
||||
%config /etc/nvidia-container-runtime/config.toml
|
||||
%{_bindir}/nvidia-container-runtime
|
||||
%{_bindir}/nvidia-ctk
|
||||
|
||||
@@ -21,14 +21,14 @@
|
||||
|
||||
function assert_usage() {
|
||||
echo "Missing argument $1"
|
||||
echo "$(basename ${BASH_SOURCE[0]}) TARGET"
|
||||
echo "$(basename "${BASH_SOURCE[0]}") TARGET"
|
||||
exit 1
|
||||
}
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../scripts && pwd )"
|
||||
PROJECT_ROOT="$( cd ${SCRIPTS_DIR}/.. && pwd )"
|
||||
PROJECT_ROOT="$( cd "${SCRIPTS_DIR}"/.. && pwd )"
|
||||
|
||||
if [[ $# -ne 1 ]]; then
|
||||
assert_usage "TARGET"
|
||||
@@ -36,46 +36,51 @@ fi
|
||||
|
||||
TARGET=$1
|
||||
|
||||
: ${DIST_DIR:=${PROJECT_ROOT}/dist}
|
||||
: "${DIST_DIR:=${PROJECT_ROOT}/dist}"
|
||||
export DIST_DIR
|
||||
|
||||
echo "Building ${TARGET} for all packages to ${DIST_DIR}"
|
||||
|
||||
: ${LIBNVIDIA_CONTAINER_ROOT:=${PROJECT_ROOT}/third_party/libnvidia-container}
|
||||
: ${NVIDIA_CONTAINER_TOOLKIT_ROOT:=${PROJECT_ROOT}}
|
||||
: ${NVIDIA_CONTAINER_RUNTIME_ROOT:=${PROJECT_ROOT}/third_party/nvidia-container-runtime}
|
||||
: ${NVIDIA_DOCKER_ROOT:=${PROJECT_ROOT}/third_party/nvidia-docker}
|
||||
: "${LIBNVIDIA_CONTAINER_ROOT:=${PROJECT_ROOT}/third_party/libnvidia-container}"
|
||||
: "${NVIDIA_CONTAINER_TOOLKIT_ROOT:=${PROJECT_ROOT}}"
|
||||
: "${NVIDIA_CONTAINER_RUNTIME_ROOT:=${PROJECT_ROOT}/third_party/nvidia-container-runtime}"
|
||||
: "${NVIDIA_DOCKER_ROOT:=${PROJECT_ROOT}/third_party/nvidia-docker}"
|
||||
|
||||
|
||||
${SCRIPTS_DIR}/get-component-versions.sh
|
||||
"${SCRIPTS_DIR}/get-component-versions.sh"
|
||||
|
||||
# Build libnvidia-container
|
||||
make -C ${LIBNVIDIA_CONTAINER_ROOT} -f mk/docker.mk ${TARGET}
|
||||
make -C "${LIBNVIDIA_CONTAINER_ROOT}" -f mk/docker.mk "${TARGET}"
|
||||
|
||||
if [[ -z ${NVIDIA_CONTAINER_TOOLKIT_VERSION} || -z ${LIBNVIDIA_CONTAINER_VERSION} ]]; then
|
||||
if [[ -z "${NVIDIA_CONTAINER_TOOLKIT_VERSION}" || -z "${LIBNVIDIA_CONTAINER_VERSION}" ]]; then
|
||||
eval $(${SCRIPTS_DIR}/get-component-versions.sh)
|
||||
fi
|
||||
|
||||
# Build nvidia-container-toolkit
|
||||
make -C ${NVIDIA_CONTAINER_TOOLKIT_ROOT} \
|
||||
make -C "${NVIDIA_CONTAINER_TOOLKIT_ROOT}" \
|
||||
LIBNVIDIA_CONTAINER_VERSION="${LIBNVIDIA_CONTAINER_VERSION}" \
|
||||
LIBNVIDIA_CONTAINER_TAG="${LIBNVIDIA_CONTAINER_TAG}" \
|
||||
${TARGET}
|
||||
"${TARGET}"
|
||||
|
||||
# We set the TOOLKIT_VERSION, TOOLKIT_TAG for the nvidia-container-runtime and nvidia-docker targets
|
||||
# The LIB_TAG is also overridden to match the TOOLKIT_TAG.
|
||||
# Build nvidia-container-runtime
|
||||
make -C ${NVIDIA_CONTAINER_RUNTIME_ROOT} \
|
||||
LIB_VERSION="${NVIDIA_CONTAINER_RUNTIME_VERSION}" \
|
||||
LIB_TAG="${NVIDIA_CONTAINER_TOOLKIT_TAG}" \
|
||||
TOOLKIT_VERSION="${NVIDIA_CONTAINER_TOOLKIT_VERSION}" \
|
||||
TOOLKIT_TAG="${NVIDIA_CONTAINER_TOOLKIT_TAG}" \
|
||||
${TARGET}
|
||||
if [[ -z ${NVIDIA_CONTAINER_TOOLKIT_TAG} ]]; then
|
||||
# We set the TOOLKIT_VERSION, TOOLKIT_TAG for the nvidia-container-runtime and nvidia-docker targets
|
||||
# The LIB_TAG is also overridden to match the TOOLKIT_TAG.
|
||||
# Build nvidia-container-runtime
|
||||
make -C ${NVIDIA_CONTAINER_RUNTIME_ROOT} \
|
||||
LIB_VERSION="${NVIDIA_CONTAINER_RUNTIME_VERSION}" \
|
||||
LIB_TAG="${NVIDIA_CONTAINER_TOOLKIT_TAG}" \
|
||||
TOOLKIT_VERSION="${NVIDIA_CONTAINER_TOOLKIT_VERSION}" \
|
||||
TOOLKIT_TAG="${NVIDIA_CONTAINER_TOOLKIT_TAG}" \
|
||||
${TARGET}
|
||||
|
||||
# Build nvidia-docker2
|
||||
make -C ${NVIDIA_DOCKER_ROOT} \
|
||||
LIB_VERSION="${NVIDIA_DOCKER_VERSION}" \
|
||||
LIB_TAG="${NVIDIA_CONTAINER_TOOLKIT_TAG}" \
|
||||
TOOLKIT_VERSION="${NVIDIA_CONTAINER_TOOLKIT_VERSION}" \
|
||||
TOOLKIT_TAG="${NVIDIA_CONTAINER_TOOLKIT_TAG}" \
|
||||
${TARGET}
|
||||
# Build nvidia-docker2
|
||||
make -C ${NVIDIA_DOCKER_ROOT} \
|
||||
LIB_VERSION="${NVIDIA_DOCKER_VERSION}" \
|
||||
LIB_TAG="${NVIDIA_CONTAINER_TOOLKIT_TAG}" \
|
||||
TOOLKIT_VERSION="${NVIDIA_CONTAINER_TOOLKIT_VERSION}" \
|
||||
TOOLKIT_TAG="${NVIDIA_CONTAINER_TOOLKIT_TAG}" \
|
||||
${TARGET}
|
||||
|
||||
else
|
||||
echo "Skipping nvidia-container-runtime and nvidia-docker builds for release candidate"
|
||||
fi
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -22,7 +22,6 @@
|
||||
set -e
|
||||
|
||||
SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../scripts && pwd )"
|
||||
PROJECT_ROOT="$( cd ${SCRIPTS_DIR}/.. && pwd )"
|
||||
|
||||
# This list represents the distribution-architecture pairs that are actually published
|
||||
# to the relevant repositories. This targets forwarded to the build-all-components script
|
||||
@@ -37,6 +36,8 @@ all=(
|
||||
centos8-x86_64
|
||||
debian10-amd64
|
||||
debian9-amd64
|
||||
fedora35-aarch64
|
||||
fedora35-x86_64
|
||||
opensuse-leap15.1-x86_64
|
||||
ubuntu16.04-amd64
|
||||
ubuntu16.04-ppc64le
|
||||
@@ -52,7 +53,7 @@ else
|
||||
fi
|
||||
|
||||
echo "Updating components"
|
||||
${SCRIPTS_DIR}/update-components.sh
|
||||
"${SCRIPTS_DIR}/update-components.sh"
|
||||
if [[ -n $(git status -s third_party) && ${ALLOW_LOCAL_COMPONENT_CHANGES} != "true" ]]; then
|
||||
echo "ERROR: Building with local component changes."
|
||||
echo "Commit pending changes or rerun with ALLOW_LOCAL_COMPONENT_CHANGES='true'"
|
||||
@@ -64,7 +65,7 @@ eval $(${SCRIPTS_DIR}/get-component-versions.sh)
|
||||
|
||||
if [[ -n ${NVIDIA_CONTAINER_TOOLKIT_TAG} ]]; then
|
||||
echo "Allowing mismatched versions for release candidate "
|
||||
: ${ALLOW_VERSION_MISMATCH:=true}
|
||||
: "${ALLOW_VERSION_MISMATCH:=true}"
|
||||
fi
|
||||
|
||||
if [[ "${NVIDIA_CONTAINER_TOOLKIT_PACKAGE_VERSION}" != "${LIBNVIDIA_CONTAINER_PACKAGE_VERSION}" ]]; then
|
||||
@@ -84,6 +85,6 @@ export LIBNVIDIA_CONTAINER_TAG
|
||||
export NVIDIA_CONTAINER_RUNTIME_VERSION
|
||||
export NVIDIA_DOCKER_VERSION
|
||||
|
||||
for target in ${targets[@]}; do
|
||||
${SCRIPTS_DIR}/build-all-components.sh ${target}
|
||||
for target in "${targets[@]}"; do
|
||||
"${SCRIPTS_DIR}/build-all-components.sh" "${target}"
|
||||
done
|
||||
|
||||
114
scripts/extract-packages.sh
Executable file
114
scripts/extract-packages.sh
Executable file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
function assert_usage() {
|
||||
echo "Incorrect arguments: $*" >&2
|
||||
echo "$(basename "${BASH_SOURCE[0]}") PACKAGE_IMAGE_NAME:PACKAGE_IMAGE_TAG DIST-ARCH" >&2
|
||||
echo -e "\\tPACKAGE_IMAGE: container image holding packages [e.g. registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging/container-toolkit]" >&2
|
||||
echo -e "\\tPACKAGE_TAG: tag for container image holding packages. [e.g. 1a2b3c4-packaging]" >&2
|
||||
echo -e "\\tDIST: The distribution." >&2
|
||||
echo -e "\\tARCH: The architecture." >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../scripts && pwd )"
|
||||
PROJECT_ROOT="$( cd "${SCRIPTS_DIR}/.." && pwd )"
|
||||
|
||||
if [[ $# -ne 2 ]]; then
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
PACKAGE_IMAGE=$1
|
||||
DISTARCH=$2
|
||||
DIST=${DISTARCH%-*}
|
||||
ARCH=${DISTARCH##*-}
|
||||
|
||||
if [[ -z "${DIST}" || -z "${ARCH}" ]]; then
|
||||
echo "ERROR: Distro and Architecture must be specified." >&2
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
# TODO: accept ARTIFACTS_DIR as a command-line argument
|
||||
: "${ARTIFACTS_DIR="${PROJECT_ROOT}/artifacts"}"
|
||||
|
||||
# For release-candidates we skip certain packages.
|
||||
# For example, we don't release release candidates of nvidia-container-runtime and nvidia-docker2
|
||||
# since these only bump the nvidia-container-toolkit dependency.
|
||||
function skip-for-release-candidate() {
|
||||
if [[ "${VERSION/rc./}" == "${VERSION}" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
local package_name=$1
|
||||
if [[ "${package_name/"nvidia-docker2"/}" != "${package_name}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
if [[ "${package_name/"nvidia-container-runtime"/}" != "${package_name}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
# extract-file copies a file from a specified image.
|
||||
# If regctl is available this is used, otherwise a docker container is run and the file is copied from
|
||||
# there.
|
||||
function copy-file() {
|
||||
local image=$1
|
||||
local path_in_image=$2
|
||||
local path_on_host=$3
|
||||
if command -v regctl; then
|
||||
regctl image get-file "${image}" "${path_in_image}" "${path_on_host}"
|
||||
else
|
||||
# Note this will only work for destinations where the `path_on_host` is in `pwd`
|
||||
docker run --rm \
|
||||
-v "$(pwd):$(pwd)" \
|
||||
-w "$(pwd)" \
|
||||
-u "$(id -u):$(id -g)" \
|
||||
--entrypoint="bash" \
|
||||
"${image}" \
|
||||
-c "cp ${path_in_image} ${path_on_host}"
|
||||
fi
|
||||
}
|
||||
|
||||
eval $(${SCRIPTS_DIR}/get-component-versions.sh)
|
||||
|
||||
# extract-all extracts all package for the specified dist-arch combination from the package image.
|
||||
# The manifest.txt file in the image is used to detemine the applicable files for the combination.
|
||||
# Files are extracted to ${ARTIFACTS_DIR}/artifacts/packages/${dist}/${arch}
|
||||
function extract-all() {
|
||||
local dist=$1
|
||||
local arch=$2
|
||||
|
||||
echo "Extracting packages for ${dist}-${arch} from ${PACKAGE_IMAGE}"
|
||||
|
||||
mkdir -p "${ARTIFACTS_DIR}"
|
||||
copy-file "${PACKAGE_IMAGE}" "/artifacts/manifest.txt" "${ARTIFACTS_DIR}/manifest.txt"
|
||||
|
||||
# Extract every file for the specified dist-arch combiniation in MANIFEST.txt
|
||||
grep "/${dist}/${arch}/" "${ARTIFACTS_DIR}/manifest.txt" | while read -r f ; do
|
||||
package_name="$(basename "$f")"
|
||||
# For release-candidates, we skip certain packages
|
||||
if skip-for-release-candidate "${package_name}"; then
|
||||
echo "Skipping $f for release-candidate ${VERSION}"
|
||||
continue
|
||||
fi
|
||||
target="${ARTIFACTS_DIR}/packages/${dist}/${arch}/${package_name}"
|
||||
mkdir -p "$(dirname "$target")"
|
||||
copy-file "${PACKAGE_IMAGE}" "${f}" "${target}"
|
||||
done
|
||||
}
|
||||
|
||||
extract-all "${DIST}" "${ARCH}"
|
||||
@@ -38,7 +38,7 @@ libnvidia_container_version_tag=$(grep "#define NVC_VERSION" ${LIBNVIDIA_CONTAIN
|
||||
| sed -e 's/#define NVC_VERSION[[:space:]]"\(.*\)"/\1/')
|
||||
libnvidia_container_version=${libnvidia_container_version_tag%%~*}
|
||||
libnvidia_container_tag=${libnvidia_container_version_tag##${libnvidia_container_version}}
|
||||
libnvidia_container_tag=${libnvidia_container_tag##~}
|
||||
libnvidia_container_tag=${libnvidia_container_tag##\~}
|
||||
|
||||
versions_makefile=${NVIDIA_CONTAINER_TOOLKIT_ROOT}/versions.mk
|
||||
# Get version for nvidia-container-toolit
|
||||
|
||||
@@ -9,7 +9,7 @@ set -x -e
|
||||
|
||||
function deb-sign {
|
||||
local last_found
|
||||
for r in ${*}; do
|
||||
for r in "$@"; do
|
||||
if [ -f "./${r}" ]; then
|
||||
last_found=${r}
|
||||
fi
|
||||
@@ -27,12 +27,12 @@ function deb-sign {
|
||||
--no-emit-version \
|
||||
--no-comments \
|
||||
--personal-digest-preferences sha512 \
|
||||
--local-user ${GPG_LOCAL_USER} \
|
||||
--local-user "${GPG_LOCAL_USER}" \
|
||||
> InRelease
|
||||
}
|
||||
|
||||
function rpm-sign {
|
||||
for r in ${*}; do
|
||||
for r in "$@"; do
|
||||
if [ -f "./${r}" ]; then
|
||||
rpmsign --addsign --key-id A04EA552 --digest-algo=sha512 "${r}"
|
||||
fi
|
||||
@@ -42,7 +42,7 @@ function rpm-sign {
|
||||
--armor \
|
||||
--no-emit-version \
|
||||
--no-comments --personal-digest-preferences sha512 \
|
||||
--local-user ${GPG_LOCAL_USER} \
|
||||
--local-user "${GPG_LOCAL_USER}" \
|
||||
repodata/repomd.xml
|
||||
}
|
||||
|
||||
@@ -61,6 +61,8 @@ function sign() {
|
||||
;;
|
||||
debian*) pkg_type=deb
|
||||
;;
|
||||
fedora*) pkg_type=rpm
|
||||
;;
|
||||
opensuse-leap*) pkg_type=rpm
|
||||
;;
|
||||
ubuntu*) pkg_type=deb
|
||||
@@ -81,15 +83,15 @@ function sign() {
|
||||
return
|
||||
fi
|
||||
|
||||
cd ${dst}
|
||||
cd "${dst}"
|
||||
if [[ -f "/etc/debian_version" ]]; then
|
||||
[[ ${pkg_type} == "deb" ]] && deb-sign ${ALL_DEBS}
|
||||
[[ "${pkg_type}" == "deb" ]] && deb-sign ${ALL_DEBS}
|
||||
else
|
||||
[[ ${pkg_type} == "rpm" ]] && rpm-sign ${ALL_RPMS}
|
||||
[[ "${pkg_type}" == "rpm" ]] && rpm-sign ${ALL_RPMS}
|
||||
fi
|
||||
cd -
|
||||
}
|
||||
|
||||
for target in ${TARGETS[@]}; do
|
||||
sign ${target} $(pwd)
|
||||
for target in "${TARGETS[@]}"; do
|
||||
sign "${target}" "$(pwd)"
|
||||
done
|
||||
|
||||
216
scripts/release-kitmaker-artifactory.sh
Executable file
216
scripts/release-kitmaker-artifactory.sh
Executable file
@@ -0,0 +1,216 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
function assert_usage() {
|
||||
cat >&2 << EOF
|
||||
Incorrect arguments: $*
|
||||
$(basename "${BASH_SOURCE[0]}") DIST-ARCH ARTIFACTORY_URL
|
||||
DIST: The distribution.
|
||||
ARCH: The architecture.
|
||||
ARTIFACTORY_URL must contain repo path for package, including hostname.
|
||||
|
||||
Environment Variables
|
||||
ARTIFACTORY_TOKEN: must contain an auth token. [required]
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../scripts && pwd )"
|
||||
PROJECT_ROOT="$( cd "${SCRIPTS_DIR}/.." && pwd )"
|
||||
COMPONENT_NAME="nvidia-container-toolkit"
|
||||
|
||||
if [[ $# -ne 2 ]]; then
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
source "${SCRIPTS_DIR}"/utils.sh
|
||||
|
||||
DISTARCH=$1
|
||||
DIST=${DISTARCH%-*}
|
||||
ARCH=${DISTARCH##*-}
|
||||
ARTIFACTORY_URL=$2
|
||||
|
||||
CURL=${CURL:-curl}
|
||||
|
||||
if [[ -z "${DIST}" || -z "${ARCH}" ]]; then
|
||||
echo "ERROR: Distro and Architecture must be specified." >&2
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
# TODO: accept ARTIFACTS_DIR as a command-line argument
|
||||
: "${ARTIFACTS_DIR="${PROJECT_ROOT}/artifacts"}"
|
||||
|
||||
if [[ ! -d "${ARTIFACTS_DIR}" ]]; then
|
||||
echo "ERROR: ARTIFACTS_DIR does not exist." >&2
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
if [[ -z "${ARTIFACTORY_TOKEN}" ]]; then
|
||||
echo "ERROR: ARTIFACTORY_TOKEN must be defined." >&2
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
# TODO: accept KITMACKER_DIR as a command-line argument
|
||||
: "${KITMAKER_DIR="${PROJECT_ROOT}/artifacts/kitmaker"}"
|
||||
|
||||
eval $(${SCRIPTS_DIR}/get-component-versions.sh)
|
||||
|
||||
# Returns the key=value property if the value isn't empty
|
||||
# Prepends with ";" if needed
|
||||
set_prop_value() {
|
||||
local key=$1
|
||||
local value=$2
|
||||
if [ -n "${value}" ]; then
|
||||
if [ -z "${PROPS}" ]; then
|
||||
echo "${key}=${value}"
|
||||
else
|
||||
echo ";${key}=${value}"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
process_props() {
|
||||
local dist=$1
|
||||
local arch=$2
|
||||
|
||||
PROPS+=$(set_prop_value "component_name" "${COMPONENT_NAME}")
|
||||
PROPS+=$(set_prop_value "version" "${VERSION}")
|
||||
PROPS+=$(set_prop_value "os" "${dist}")
|
||||
PROPS+=$(set_prop_value "arch" "${arch}")
|
||||
PROPS+=$(set_prop_value "platform" "${dist}-${arch}")
|
||||
# TODO: Use `git describe` to get this information if it's not available.
|
||||
PROPS+=$(set_prop_value "changelist" "${CI_COMMIT_SHA}")
|
||||
PROPS+=$(set_prop_value "branch" "${CI_COMMIT_REF_NAME}")
|
||||
|
||||
# Gitlab variables to expose
|
||||
for var in CI_PROJECT_ID CI_PIPELINE_ID CI_JOB_ID CI_JOB_URL CI_PROJECT_PATH; do
|
||||
if [ -n "${!var}" ]; then
|
||||
PROPS+=$(set_prop_value "${var}" "${!var}")
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Applying properties: ${PROPS}"
|
||||
}
|
||||
|
||||
## NOT USED:
|
||||
## can substitute this function place of upload_file to modify properties of
|
||||
## existing file instead of uploading files.
|
||||
# Sets the properties on a path
|
||||
# Relies on global variables: ARTIFACTORY_TOKEN, ARTIFACTORY_URL
|
||||
set_props() {
|
||||
local dist="$1"
|
||||
local arch="$2"
|
||||
local kitmakerfilename="$3"
|
||||
|
||||
# extract the Artifactory hostname
|
||||
artifactory_host=$(echo "${ARTIFACTORY_URL##https://}" | awk -F'/' '{print $1}')
|
||||
local image_path="${ARTIFACTORY_URL#https://${artifactory_host}/}/${dist}/${arch}/${kitmakerfilename}"
|
||||
|
||||
local PROPS
|
||||
process_props "${DIST}" "${ARCH}"
|
||||
|
||||
echo "Setting ${image_path} with properties: ${PROPS}"
|
||||
if ! ${CURL} -fs -H "X-JFrog-Art-Api: ${ARTIFACTORY_TOKEN}" \
|
||||
-X PUT \
|
||||
"https://${artifactory_host}/artifactory/api/storage/${image_path}?properties=${PROPS}&recursive=0" ; then
|
||||
echo "ERROR: set props failed: ${image_path}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Uploads file to ARTIFACTS_DIR/<os>/<arch>/<filename>
|
||||
# Relies on global variables: DIST, ARCH, ARTIFACTORY_TOKEN, ARTIFACTORY_URL
|
||||
upload_file() {
|
||||
local dist=$1
|
||||
local arch=$2
|
||||
local file=$3
|
||||
|
||||
# extract the Artifactory hostname
|
||||
artifactory_host=$(echo "${ARTIFACTORY_URL##https://}" | awk -F'/' '{print $1}')
|
||||
# get base part of the ARTIFACTORY_URL without hostname
|
||||
local image_path="${ARTIFACTORY_URL#https://${artifactory_host}/}/${dist}/${arch}/$(basename ${file})"
|
||||
|
||||
local PROPS
|
||||
process_props "${dist}" "${arch}"
|
||||
|
||||
if [ ! -r "${file}" ]; then
|
||||
echo "ERROR: File not found or not readable: ${file}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Collect sum
|
||||
SHA1_SUM=$(sha1sum -b "${file}" | awk '{ print $1 }')
|
||||
|
||||
echo "Uploading ${image_path} from ${file}"
|
||||
if ! ${CURL} -f \
|
||||
-H "X-JFrog-Art-Api: ${ARTIFACTORY_TOKEN}" \
|
||||
-H "X-Checksum-Sha1: ${SHA1_SUM}" \
|
||||
${file:+-T ${file}} -X PUT \
|
||||
"https://${artifactory_host}/${image_path};${PROPS}" ;
|
||||
then
|
||||
echo "ERROR: upload file failed: ${file}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function push-kitmaker-artifactory() {
|
||||
local dist=$1
|
||||
local arch=$2
|
||||
local archive=$3
|
||||
|
||||
upload_file "${dist}" "${arch}" "${archive}"
|
||||
}
|
||||
|
||||
# kitmakerize-distro creates a tar.gz archive for the specified dist-arch combination.
|
||||
# The archive is created at ${KITMAKER_DIR}/${name}.tar.gz (where ${name} is the third positional argument)
|
||||
function kitmakerize-distro() {
|
||||
local dist="$1"
|
||||
local arch="$2"
|
||||
local archive="$3"
|
||||
|
||||
local name=$(basename "${archive%%.tar.gz}")
|
||||
## Copy packages into directory layout for .tar.gz
|
||||
# TODO: make scratch_dir configurable
|
||||
local scratch_dir="$(dirname ${archive})/.scratch/${name}"
|
||||
local packages_dir="${scratch_dir}/.packages/"
|
||||
|
||||
mkdir -p "${packages_dir}"
|
||||
|
||||
# Copy the extracted files to the .packages directory so that a kitmaker file can be created.
|
||||
source="${ARTIFACTS_DIR}/packages/${dist}/${arch}"
|
||||
cp -r "${source}/"* "${packages_dir}/"
|
||||
|
||||
## Tar up the directory structure created above
|
||||
tar zcvf "${archive}" -C "${scratch_dir}/.." "${name}"
|
||||
echo "Created: ${archive}"
|
||||
ls -l "${archive}"
|
||||
echo "With contents:"
|
||||
tar -tzvf "${archive}"
|
||||
echo ""
|
||||
|
||||
# Clean up the scratch directories:
|
||||
rm -f "${scratch_dir}/.packages/"*
|
||||
rmdir "${scratch_dir}/.packages"
|
||||
rmdir "${scratch_dir}"
|
||||
}
|
||||
|
||||
: "${VERSION=$({NVIDIA_CONTAINER_TOOLKIT_PACKAGE_VERSION})}"
|
||||
kitmaker_name="${COMPONENT_NAME//-/_}-${DIST}-${ARCH}-${VERSION}"
|
||||
kitmaker_archive="${KITMAKER_DIR}/${kitmaker_name}.tar.gz"
|
||||
kitmakerize-distro "${DIST}" "${ARCH}" "${kitmaker_archive}"
|
||||
push-kitmaker-artifactory "${DIST}" "${ARCH}" "${kitmaker_archive}"
|
||||
189
scripts/release-packages-artifactory.sh
Executable file
189
scripts/release-packages-artifactory.sh
Executable file
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Dependencies:
|
||||
# regctl
|
||||
#
|
||||
|
||||
function assert_usage() {
|
||||
cat >&2 << EOF
|
||||
Incorrect arguments: $*
|
||||
$(basename "${BASH_SOURCE[0]}") DIST-ARCH
|
||||
DIST: The distribution.
|
||||
ARCH: The architecture.
|
||||
|
||||
Environment Variables
|
||||
ARTIFACTORY_TOKEN: must contain an auth token. [required]
|
||||
LIB_TAG: optional package tag.
|
||||
CI_COMMIT_REF_NAME: provided by CI/CD system.
|
||||
CI_COMMIT_SHA: provided by CI/CD system.
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
|
||||
SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../scripts && pwd )"
|
||||
PROJECT_ROOT="$( cd "${SCRIPTS_DIR}/.." && pwd )"
|
||||
|
||||
source "${SCRIPTS_DIR}"/utils.sh
|
||||
|
||||
if [[ $# -ne 1 ]]; then
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
DISTARCH=$1
|
||||
ARTIFACTORY_PATH=$2
|
||||
DIST=${DISTARCH%-*}
|
||||
ARCH=${DISTARCH##*-}
|
||||
|
||||
CURL=${CURL:-curl}
|
||||
|
||||
if [[ -z "${DIST}" || -z "${ARCH}" ]]; then
|
||||
echo "ERROR: Distro and Architecture must be specified." >&2
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
if [[ -z "${ARTIFACTORY_PATH}" ]]; then
|
||||
echo "ERROR: Package repo must be specified." >&2
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
if [[ -z "${ARTIFACTORY_TOKEN}" ]]; then
|
||||
echo "ERROR: ARTIFACTORY_TOKEN must be defined." >&2
|
||||
assert_usage "$@"
|
||||
fi
|
||||
|
||||
# TODO: accept PACKAGES_DIR as a command-line argument
|
||||
: "${ARTIFACTS_DIR="${PROJECT_ROOT}/artifacts"}"
|
||||
: "${PACKAGES_DIR="${ARTIFACTS_DIR}/packages"}"
|
||||
|
||||
eval $(${SCRIPTS_DIR}/get-component-versions.sh)
|
||||
|
||||
# Returns the key=value property if the value isn't empty
|
||||
# Prepends with ";" if needed
|
||||
set_prop_value() {
|
||||
local key=$1
|
||||
local value=$2
|
||||
if [ -n "${value}" ]; then
|
||||
if [ -z "${PROPS}" ]; then
|
||||
echo "${key}=${value}"
|
||||
else
|
||||
echo ";${key}=${value}"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
process_props() {
|
||||
local dist=$1
|
||||
local arch=$2
|
||||
local file=$3
|
||||
local component_name="${file%%.*}"
|
||||
component_name="${component_name%-*}"
|
||||
local pkg_type="$(package_type $dist)"
|
||||
|
||||
## Component owner is free to define these
|
||||
# PROPS+=$(set_prop_value "version" "${VERSION}")
|
||||
# PROPS+=$(set_prop_value "lws_version" "${LWS_VER}")
|
||||
# PROPS+=$(set_prop_value "platform" "${DISTARCH}")
|
||||
|
||||
# TODO: Use `git describe` to get this information if it's not available.
|
||||
PROPS+=$(set_prop_value "changelist" "${CI_COMMIT_SHA}")
|
||||
PROPS+=$(set_prop_value "branch" "${CI_COMMIT_REF_NAME}")
|
||||
|
||||
# PROPS+=$(set_prop_value "category" "utils")
|
||||
# PROPS+=$(set_prop_value "platform" "${DISTARCH}")
|
||||
|
||||
# Gitlab variables to expose
|
||||
for var in CI_PROJECT_ID CI_PIPELINE_ID CI_JOB_ID CI_JOB_URL CI_PROJECT_PATH; do
|
||||
if [ -n "${!var}" ]; then
|
||||
PROPS+=$(set_prop_value "${var}" "${!var}")
|
||||
fi
|
||||
done
|
||||
|
||||
# We also set the package-specific properties to allow this to be used for other artifactory repositories
|
||||
PROPS+=$(set_prop_value "${pkg_type}.distribution" "${dist}")
|
||||
PROPS+=$(set_prop_value "${pkg_type}.architecture" "${arch}")
|
||||
PROPS+=$(set_prop_value "${pkg_type}.component" "${component_name}")
|
||||
}
|
||||
|
||||
# Uploads file ARTIFACTORY_PATH
|
||||
# Relies on global variables: DIST, ARCH, ARTIFACTORY_TOKEN, ARTIFACTORY_PATH
|
||||
upload_file() {
|
||||
local dist=$1
|
||||
local arch=$2
|
||||
local file=$3
|
||||
|
||||
# TODO: These should be set by envvars
|
||||
local artifactory_host="urm.nvidia.com"
|
||||
local artifactory_repo="$(get_artifactory_repository $dist)"
|
||||
|
||||
if [ ! -r "${file}" ]; then
|
||||
echo "ERROR: File not found or not readable: ${file}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local PROPS
|
||||
process_props "${dist}" "${arch}" "${file}"
|
||||
|
||||
# Collect sum
|
||||
SHA1_SUM=$(sha1sum -b "${file}" | awk '{ print $1 }')
|
||||
|
||||
url="https://${artifactory_host}/artifactory/${artifactory_repo}/${dist}/${arch}/$(basename "${file}")"
|
||||
# NOTE: The URL to set the properties through the API is:
|
||||
# "https://${artifactory_host}/artifactory/api/storage/${artifactory_repo}/${dist}/${arch}/$(basename ${file})"
|
||||
|
||||
echo "Uploading ${file} to ${url}"
|
||||
if ! ${CURL} -f \
|
||||
-H "X-JFrog-Art-Api: ${ARTIFACTORY_TOKEN}" \
|
||||
-H "X-Checksum-Sha1: ${SHA1_SUM}" \
|
||||
${file:+-T ${file}} -X PUT \
|
||||
"${url};${PROPS}" ;
|
||||
then
|
||||
echo "ERROR: upload file failed: ${file}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function push-artifactory() {
|
||||
local dist="$1"
|
||||
local arch="$2"
|
||||
|
||||
source="${ARTIFACTS_DIR}/packages/${dist}/${arch}"
|
||||
|
||||
find "${source}" -maxdepth 1 | while read -r f ; do
|
||||
upload_file "$dist" "$arch" "$f"
|
||||
done
|
||||
}
|
||||
|
||||
# TODO: use this to adapt as a general purpose command-line tool
|
||||
# case "${COMMAND}" in
|
||||
# set)
|
||||
# set_props
|
||||
# ;;
|
||||
# upload)
|
||||
# if [ -z "${UPLOAD_FILE}" ]; then
|
||||
# echo "ERROR: Upload package filename must be set using -f"
|
||||
# usage
|
||||
# fi
|
||||
#
|
||||
# upload_file
|
||||
# ;;
|
||||
# *)
|
||||
# echo "ERROR: Invalid command ${COMMAND}"
|
||||
# usage
|
||||
# ;;
|
||||
# esac
|
||||
|
||||
push-artifactory "${DIST}" "${ARCH}"
|
||||
@@ -94,6 +94,8 @@ function sync() {
|
||||
;;
|
||||
debian*) pkg_type=deb
|
||||
;;
|
||||
fedora*) pkg_type=rpm
|
||||
;;
|
||||
opensuse-leap*) pkg_type=rpm
|
||||
;;
|
||||
ubuntu*) pkg_type=deb
|
||||
@@ -130,8 +132,17 @@ function sync() {
|
||||
|
||||
done
|
||||
if [[ ${REPO} == "stable" ]]; then
|
||||
cp ${src}/nvidia-container-runtime*.${pkg_type} ${dst}
|
||||
cp ${src}/nvidia-docker*.${pkg_type} ${dst}
|
||||
for f in $(ls ${src}/nvidia-container-runtime*.${pkg_type} ${src}/nvidia-docker*.${pkg_type}); do
|
||||
df=${dst}/$(basename ${f})
|
||||
df_stable=${df//"/experimental/"/"/stable/"}
|
||||
if [[ -f "${df}" ]]; then
|
||||
echo "${df} already exists; skipping"
|
||||
elif [[ ${REPO} == "experimental" && -f ${df_stable} ]]; then
|
||||
echo "${df_stable} already exists; skipping"
|
||||
else
|
||||
cp ${f} ${df}
|
||||
fi
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -148,6 +159,8 @@ all=(
|
||||
centos8-x86_64
|
||||
debian10-amd64
|
||||
debian9-amd64
|
||||
fedora35-aarch64
|
||||
fedora35-x86_64
|
||||
opensuse-leap15.1-x86_64
|
||||
ubuntu16.04-amd64
|
||||
ubuntu16.04-ppc64le
|
||||
|
||||
36
scripts/utils.sh
Normal file
36
scripts/utils.sh
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
# package_type returns the packaging type (deb or rpm) for the specfied distribution.
|
||||
# An error is returned if the ditribution is unsupported.
|
||||
function package_type() {
|
||||
local pkg_type
|
||||
case ${1} in
|
||||
amazonlinux*) pkg_type=rpm
|
||||
;;
|
||||
centos*) pkg_type=rpm
|
||||
;;
|
||||
debian*) pkg_type=deb
|
||||
;;
|
||||
fedora*) pkg_type=rpm
|
||||
;;
|
||||
opensuse-leap*) pkg_type=rpm
|
||||
;;
|
||||
ubuntu*) pkg_type=deb
|
||||
;;
|
||||
*) exit 1
|
||||
;;
|
||||
esac
|
||||
echo "${pkg_type}"
|
||||
}
|
||||
|
||||
function get_artifactory_repository() {
|
||||
local pkg_type=$(package_type $1)
|
||||
|
||||
case ${pkg_type} in
|
||||
deb) echo "sw-gpu-cloudnative-debian-local"
|
||||
;;
|
||||
rpm) echo "sw-gpu-cloudnative-rpm-local"
|
||||
;;
|
||||
*) echo "sw-gpu-cloudnative-generic-local"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
@@ -43,8 +43,8 @@ testing::containerd::toolkit::run() {
|
||||
|
||||
# Ensure that we can run some non GPU containers from within dind
|
||||
with_retry 3 5s testing::containerd::dind::exec " \
|
||||
ctr --address=${containerd_dind_containerd_dir}/containerd.sock image pull nvcr.io/nvidia/cuda:11.1-base; \
|
||||
ctr --address=${containerd_dind_containerd_dir}/containerd.sock run --rm --runtime=io.containerd.runtime.v1.linux nvcr.io/nvidia/cuda:11.1-base cuda echo foo"
|
||||
ctr --address=${containerd_dind_containerd_dir}/containerd.sock image pull nvcr.io/nvidia/cuda:11.1.1-base-ubuntu20.04; \
|
||||
ctr --address=${containerd_dind_containerd_dir}/containerd.sock run --rm --runtime=io.containerd.runtime.v1.linux nvcr.io/nvidia/cuda:11.1.1-base-ubuntu20.04 cuda echo foo"
|
||||
|
||||
# Share the volumes so that we can edit the config file and point to the new runtime
|
||||
# Share the pid so that we can ask docker to reload its config
|
||||
@@ -63,8 +63,8 @@ testing::containerd::toolkit::run() {
|
||||
|
||||
# Ensure that we haven't broken non GPU containers
|
||||
with_retry 3 5s testing::containerd::dind::exec " \
|
||||
ctr --address=${containerd_dind_containerd_dir}/containerd.sock image pull nvcr.io/nvidia/cuda:11.1-base; \
|
||||
ctr --address=${containerd_dind_containerd_dir}/containerd.sock run --rm --runtime=io.containerd.runtime.v1.linux nvcr.io/nvidia/cuda:11.1-base cuda echo foo"
|
||||
ctr --address=${containerd_dind_containerd_dir}/containerd.sock image pull nvcr.io/nvidia/cuda:11.1.1-base-ubuntu20.04; \
|
||||
ctr --address=${containerd_dind_containerd_dir}/containerd.sock run --rm --runtime=io.containerd.runtime.v1.linux nvcr.io/nvidia/cuda:11.1.1-base-ubuntu20.04 cuda echo foo"
|
||||
}
|
||||
|
||||
# This test runs containerd setup and containerd cleanup in succession to ensure that the
|
||||
|
||||
@@ -23,7 +23,7 @@ testing::toolkit::install() {
|
||||
READLINK="greadlink"
|
||||
fi
|
||||
|
||||
testing::docker_run::toolkit::shell 'toolkit install /usr/local/nvidia/toolkit'
|
||||
testing::docker_run::toolkit::shell 'toolkit install --toolkit-root=/usr/local/nvidia/toolkit'
|
||||
docker run --rm -v "${shared_dir}:/work" alpine sh -c "chown -R ${uid}:${gid} /work/"
|
||||
|
||||
# Ensure toolkit dir is correctly setup
|
||||
@@ -66,7 +66,7 @@ testing::toolkit::install() {
|
||||
testing::toolkit::delete() {
|
||||
testing::docker_run::toolkit::shell 'mkdir -p /usr/local/nvidia/delete-toolkit'
|
||||
testing::docker_run::toolkit::shell 'touch /usr/local/nvidia/delete-toolkit/test.file'
|
||||
testing::docker_run::toolkit::shell 'toolkit delete /usr/local/nvidia/delete-toolkit'
|
||||
testing::docker_run::toolkit::shell 'toolkit delete --toolkit-root=/usr/local/nvidia/delete-toolkit'
|
||||
|
||||
test ! -z "$(ls -A "${shared_dir}/usr/local/nvidia")"
|
||||
test ! -e "${shared_dir}/usr/local/nvidia/delete-toolkit"
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
WORKFLOW ?= nvidia-docker
|
||||
|
||||
DISTRIBUTIONS := ubuntu18.04 centos8
|
||||
DISTRIBUTIONS := ubuntu18.04 centos8 fedora35
|
||||
|
||||
IMAGE_TARGETS := $(patsubst %,image-%, $(DISTRIBUTIONS))
|
||||
RUN_TARGETS := $(patsubst %,run-%, $(DISTRIBUTIONS))
|
||||
@@ -28,7 +28,6 @@ image-%: DOCKERFILE = docker/$(*)/Dockerfile
|
||||
images: $(IMAGE_TARGETS)
|
||||
$(IMAGE_TARGETS): image-%: $(DOCKERFILE)
|
||||
docker build ${PLATFORM_ARGS} \
|
||||
--build-arg WORKFLOW="$(WORKFLOW)" \
|
||||
-t nvidia-container-toolkit-repo-test:$(*) \
|
||||
-f $(DOCKERFILE) \
|
||||
$(shell dirname $(DOCKERFILE))
|
||||
@@ -36,6 +35,7 @@ $(IMAGE_TARGETS): image-%: $(DOCKERFILE)
|
||||
|
||||
%-ubuntu18.04: ARCH ?= amd64
|
||||
%-centos8: ARCH ?= x86_64
|
||||
%-fedora35: ARCH ?= x86_64
|
||||
|
||||
PLATFORM_ARGS = --platform=linux/${ARCH}
|
||||
|
||||
|
||||
@@ -1,16 +1,6 @@
|
||||
ARG BASEIMAGE=centos:8
|
||||
ARG BASEIMAGE=quay.io/centos/centos:stream8
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
ARG BASEIMAGE
|
||||
# See https://www.centos.org/centos-linux-eol/
|
||||
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
|
||||
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
|
||||
RUN [[ "${BASEIMAGE}" != "centos:8" ]] || \
|
||||
( \
|
||||
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
|
||||
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
|
||||
)
|
||||
|
||||
RUN yum install -y \
|
||||
yum-utils \
|
||||
ruby-devel \
|
||||
@@ -35,9 +25,8 @@ RUN fpm -s empty \
|
||||
rm -f /tmp/docker.rpm
|
||||
|
||||
|
||||
ARG WORKFLOW=nvidia-docker
|
||||
RUN curl -s -L https://nvidia.github.io/${WORKFLOW}/centos8/nvidia-docker.repo \
|
||||
| tee /etc/yum.repos.d/nvidia-docker.repo
|
||||
RUN curl -s -L https://nvidia.github.io/libnvidia-container/centos8/libnvidia-container.repo \
|
||||
| tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
|
||||
COPY entrypoint.sh /
|
||||
COPY install_repo.sh /
|
||||
|
||||
@@ -21,5 +21,5 @@
|
||||
|
||||
test_repo=$1
|
||||
echo "Setting up TEST repo: ${test_repo}"
|
||||
sed -i -e "s#nvidia\.github\.io/libnvidia-container#${test_repo}/libnvidia-container#g" /etc/yum.repos.d/nvidia-docker.repo
|
||||
sed -i -e "s#nvidia\.github\.io/libnvidia-container#${test_repo}/libnvidia-container#g" /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
yum-config-manager --enable libnvidia-container-experimental
|
||||
|
||||
34
test/release/docker/fedora35/Dockerfile
Normal file
34
test/release/docker/fedora35/Dockerfile
Normal file
@@ -0,0 +1,34 @@
|
||||
ARG BASEIMAGE=fedora:35
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
RUN yum install -y \
|
||||
yum-utils \
|
||||
ruby-devel \
|
||||
gcc \
|
||||
make \
|
||||
rpm-build \
|
||||
rubygems \
|
||||
createrepo
|
||||
|
||||
RUN gem install --no-document fpm
|
||||
|
||||
# We create and install a dummy docker package since these dependencies are out of
|
||||
# scope for the tests performed here.
|
||||
RUN fpm -s empty \
|
||||
-t rpm \
|
||||
--description "A dummy package for docker-ce_18.06.3.ce-3.el7" \
|
||||
-n docker-ce --version 18.06.3.ce-3.el7 \
|
||||
-p /tmp/docker.rpm \
|
||||
&& \
|
||||
yum localinstall -y /tmp/docker.rpm \
|
||||
&& \
|
||||
rm -f /tmp/docker.rpm
|
||||
|
||||
|
||||
RUN curl -s -L https://nvidia.github.io/libnvidia-container/fedora35/libnvidia-container.repo \
|
||||
| tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
|
||||
COPY entrypoint.sh /
|
||||
COPY install_repo.sh /
|
||||
|
||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
||||
42
test/release/docker/fedora35/entrypoint.sh
Executable file
42
test/release/docker/fedora35/entrypoint.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script is used to build the packages for the components of the NVIDIA
|
||||
# Container Stack. These include the nvidia-container-toolkit in this repository
|
||||
# as well as the components included in the third_party folder.
|
||||
# All required packages are generated in the specified dist folder.
|
||||
|
||||
: ${LOCAL_REPO_DIRECTORY:=/local-repository}
|
||||
if [[ -d ${LOCAL_REPO_DIRECTORY} ]]; then
|
||||
echo "Setting up local-repository"
|
||||
createrepo /local-repository
|
||||
|
||||
cat >/etc/yum.repos.d/local.repo <<EOL
|
||||
[local-repository]
|
||||
name=NVIDIA Container Toolkit Local Packages
|
||||
baseurl=file:///local-repository
|
||||
enabled=0
|
||||
gpgcheck=0
|
||||
protect=1
|
||||
EOL
|
||||
yum-config-manager --enable local-repository
|
||||
elif [[ -n ${TEST_REPO} ]]; then
|
||||
./install_repo.sh ${TEST_REPO}
|
||||
else
|
||||
echo "Skipping repo setup"
|
||||
fi
|
||||
|
||||
exec bash $@
|
||||
25
test/release/docker/fedora35/install_repo.sh
Executable file
25
test/release/docker/fedora35/install_repo.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script is used to build the packages for the components of the NVIDIA
|
||||
# Container Stack. These include the nvidia-container-toolkit in this repository
|
||||
# as well as the components included in the third_party folder.
|
||||
# All required packages are generated in the specified dist folder.
|
||||
|
||||
test_repo=$1
|
||||
echo "Setting up TEST repo: ${test_repo}"
|
||||
sed -i -e "s#nvidia\.github\.io/libnvidia-container#${test_repo}/libnvidia-container#g" /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
yum-config-manager --enable libnvidia-container-experimental
|
||||
@@ -39,9 +39,8 @@ RUN fpm -s empty \
|
||||
rm -f /tmp/docker.deb
|
||||
|
||||
|
||||
ARG WORKFLOW=nvidia-docker
|
||||
RUN curl -s -L https://nvidia.github.io/${WORKFLOW}/gpgkey | apt-key add - \
|
||||
&& curl -s -L https://nvidia.github.io/${WORKFLOW}/ubuntu18.04/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list \
|
||||
RUN curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | apt-key add - \
|
||||
&& curl -s -L https://nvidia.github.io/libnvidia-container/ubuntu18.04/libnvidia-container.list | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
|
||||
&& apt-get update
|
||||
|
||||
COPY entrypoint.sh /
|
||||
|
||||
@@ -21,5 +21,5 @@
|
||||
|
||||
test_repo=$1
|
||||
echo "Setting up TEST repo: ${test_repo}"
|
||||
sed -i -e "s#nvidia\.github\.io/libnvidia-container#${test_repo}/libnvidia-container#g" /etc/apt/sources.list.d/nvidia-docker.list
|
||||
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-docker.list
|
||||
sed -i -e "s#nvidia\.github\.io/libnvidia-container#${test_repo}/libnvidia-container#g" /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
|
||||
2
third_party/libnvidia-container
vendored
2
third_party/libnvidia-container
vendored
Submodule third_party/libnvidia-container updated: 395fd41701...505dedfee8
2
third_party/nvidia-container-runtime
vendored
2
third_party/nvidia-container-runtime
vendored
Submodule third_party/nvidia-container-runtime updated: d2bb9d0afe...68b81a207b
2
third_party/nvidia-docker
vendored
2
third_party/nvidia-docker
vendored
Submodule third_party/nvidia-docker updated: 0c082f6e75...80902fe3af
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -12,32 +12,60 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
hooks "github.com/containers/podman/v4/pkg/hooks/1.0.0"
|
||||
rspec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio"
|
||||
"github.com/pelletier/go-toml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
restartModeSystemd = "systemd"
|
||||
restartModeNone = "none"
|
||||
|
||||
defaultConfigMode = "hook"
|
||||
|
||||
// Hook-based settings
|
||||
defaultHooksDir = "/usr/share/containers/oci/hooks.d"
|
||||
defaultHookFilename = "oci-nvidia-hook.json"
|
||||
|
||||
// Config-based settings
|
||||
defaultConfig = "/etc/crio/crio.conf"
|
||||
defaultRuntimeClass = "nvidia"
|
||||
defaultSetAsDefault = true
|
||||
defaultRestartMode = restartModeSystemd
|
||||
defaultHostRootMount = "/host"
|
||||
)
|
||||
|
||||
var hooksDirFlag string
|
||||
var hookFilenameFlag string
|
||||
var tooklitDirArg string
|
||||
// options stores the configuration from the command linek or environment variables
|
||||
type options struct {
|
||||
configMode string
|
||||
|
||||
hooksDir string
|
||||
hookFilename string
|
||||
runtimeDir string
|
||||
|
||||
config string
|
||||
runtimeClass string
|
||||
setAsDefault bool
|
||||
restartMode string
|
||||
hostRootMount string
|
||||
}
|
||||
|
||||
func main() {
|
||||
options := options{}
|
||||
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "crio"
|
||||
@@ -48,17 +76,22 @@ func main() {
|
||||
// Create the 'setup' subcommand
|
||||
setup := cli.Command{}
|
||||
setup.Name = "setup"
|
||||
setup.Usage = "Create the cri-o hook required to run NVIDIA GPU containers"
|
||||
setup.Usage = "Configure cri-o for NVIDIA GPU containers"
|
||||
setup.ArgsUsage = "<toolkit_dirname>"
|
||||
setup.Action = Setup
|
||||
setup.Before = ParseArgs
|
||||
setup.Action = func(c *cli.Context) error {
|
||||
return Setup(c, &options)
|
||||
}
|
||||
setup.Before = func(c *cli.Context) error {
|
||||
return ParseArgs(c, &options)
|
||||
}
|
||||
|
||||
// Create the 'cleanup' subcommand
|
||||
cleanup := cli.Command{}
|
||||
cleanup.Name = "cleanup"
|
||||
cleanup.Usage = "Remove the NVIDIA cri-o hook"
|
||||
cleanup.Action = Cleanup
|
||||
|
||||
cleanup.Usage = "Remove the NVIDIA-specific cri-o configuration"
|
||||
cleanup.Action = func(c *cli.Context) error {
|
||||
return Cleanup(c, &options)
|
||||
}
|
||||
// Register the subcommands with the top-level CLI
|
||||
c.Commands = []*cli.Command{
|
||||
&setup,
|
||||
@@ -75,7 +108,7 @@ func main() {
|
||||
Aliases: []string{"d"},
|
||||
Usage: "path to the cri-o hooks directory",
|
||||
Value: defaultHooksDir,
|
||||
Destination: &hooksDirFlag,
|
||||
Destination: &options.hooksDir,
|
||||
EnvVars: []string{"CRIO_HOOKS_DIR"},
|
||||
DefaultText: defaultHooksDir,
|
||||
},
|
||||
@@ -84,10 +117,54 @@ func main() {
|
||||
Aliases: []string{"f"},
|
||||
Usage: "filename of the cri-o hook that will be created / removed in the hooks directory",
|
||||
Value: defaultHookFilename,
|
||||
Destination: &hookFilenameFlag,
|
||||
Destination: &options.hookFilename,
|
||||
EnvVars: []string{"CRIO_HOOK_FILENAME"},
|
||||
DefaultText: defaultHookFilename,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config-mode",
|
||||
Usage: "the configuration mode to use. One of [hook | config]",
|
||||
Value: defaultConfigMode,
|
||||
Destination: &options.configMode,
|
||||
EnvVars: []string{"CRIO_CONFIG_MODE"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config",
|
||||
Usage: "Path to the cri-o config file",
|
||||
Value: defaultConfig,
|
||||
Destination: &options.config,
|
||||
EnvVars: []string{"CRIO_CONFIG"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime-class",
|
||||
Usage: "The name of the runtime class to set for the nvidia-container-runtime",
|
||||
Value: defaultRuntimeClass,
|
||||
Destination: &options.runtimeClass,
|
||||
EnvVars: []string{"CRIO_RUNTIME_CLASS"},
|
||||
},
|
||||
// The flags below are only used by the 'setup' command.
|
||||
&cli.BoolFlag{
|
||||
Name: "set-as-default",
|
||||
Usage: "Set nvidia-container-runtime as the default runtime",
|
||||
Value: defaultSetAsDefault,
|
||||
Destination: &options.setAsDefault,
|
||||
EnvVars: []string{"CRIO_SET_AS_DEFAULT"},
|
||||
Hidden: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "restart-mode",
|
||||
Usage: "Specify how cri-o should be restarted; If 'none' is selected, it will not be restarted [systemd | none]",
|
||||
Value: defaultRestartMode,
|
||||
Destination: &options.restartMode,
|
||||
EnvVars: []string{"CRIO_RESTART_MODE"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "host-root",
|
||||
Usage: "Specify the path to the host root to be used when restarting crio using systemd",
|
||||
Value: defaultHostRootMount,
|
||||
Destination: &options.hostRootMount,
|
||||
EnvVars: []string{"HOST_ROOT_MOUNT"},
|
||||
},
|
||||
}
|
||||
|
||||
// Update the subcommand flags with the common subcommand flags
|
||||
@@ -101,16 +178,30 @@ func main() {
|
||||
}
|
||||
|
||||
// Setup installs the prestart hook required to launch GPU-enabled containers
|
||||
func Setup(c *cli.Context) error {
|
||||
func Setup(c *cli.Context, o *options) error {
|
||||
log.Infof("Starting 'setup' for %v", c.App.Name)
|
||||
|
||||
err := os.MkdirAll(hooksDirFlag, 0755)
|
||||
switch o.configMode {
|
||||
case "hook":
|
||||
return setupHook(o)
|
||||
case "config":
|
||||
return setupConfig(o)
|
||||
default:
|
||||
return fmt.Errorf("invalid config-mode '%v'", o.configMode)
|
||||
}
|
||||
}
|
||||
|
||||
// setupHook installs the prestart hook required to launch GPU-enabled containers
|
||||
func setupHook(o *options) error {
|
||||
log.Infof("Installing prestart hook")
|
||||
|
||||
err := os.MkdirAll(o.hooksDir, 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating hooks directory %v: %v", hooksDirFlag, err)
|
||||
return fmt.Errorf("error creating hooks directory %v: %v", o.hooksDir, err)
|
||||
}
|
||||
|
||||
hookPath := getHookPath(hooksDirFlag, hookFilenameFlag)
|
||||
err = createHook(tooklitDirArg, hookPath)
|
||||
hookPath := getHookPath(o.hooksDir, o.hookFilename)
|
||||
err = createHook(o.runtimeDir, hookPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating hook: %v", err)
|
||||
}
|
||||
@@ -118,11 +209,52 @@ func Setup(c *cli.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupConfig updates the cri-o config for the NVIDIA container runtime
|
||||
func setupConfig(o *options) error {
|
||||
log.Infof("Updating config file")
|
||||
|
||||
cfg, err := crio.LoadConfig(o.config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = UpdateConfig(cfg, o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
err = crio.FlushConfig(o.config, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
}
|
||||
|
||||
err = RestartCrio(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to restart crio: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cleanup removes the specified prestart hook
|
||||
func Cleanup(c *cli.Context) error {
|
||||
func Cleanup(c *cli.Context, o *options) error {
|
||||
log.Infof("Starting 'cleanup' for %v", c.App.Name)
|
||||
|
||||
hookPath := getHookPath(hooksDirFlag, hookFilenameFlag)
|
||||
switch o.configMode {
|
||||
case "hook":
|
||||
return cleanupHook(o)
|
||||
case "config":
|
||||
return cleanupConfig(o)
|
||||
default:
|
||||
return fmt.Errorf("invalid config-mode '%v'", o.configMode)
|
||||
}
|
||||
}
|
||||
|
||||
// cleanupHook removes the prestart hook
|
||||
func cleanupHook(o *options) error {
|
||||
log.Infof("Removing prestart hook")
|
||||
|
||||
hookPath := getHookPath(o.hooksDir, o.hookFilename)
|
||||
err := os.Remove(hookPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing hook '%v': %v", hookPath, err)
|
||||
@@ -131,15 +263,42 @@ func Cleanup(c *cli.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupConfig removes the NVIDIA container runtime from the cri-o config
|
||||
func cleanupConfig(o *options) error {
|
||||
log.Infof("Reverting config file modifications")
|
||||
|
||||
cfg, err := crio.LoadConfig(o.config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = RevertConfig(cfg, o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
err = crio.FlushConfig(o.config, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
}
|
||||
|
||||
err = RestartCrio(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to restart crio: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParseArgs parses the command line arguments to the CLI
|
||||
func ParseArgs(c *cli.Context) error {
|
||||
func ParseArgs(c *cli.Context, o *options) error {
|
||||
args := c.Args()
|
||||
|
||||
log.Infof("Parsing arguments: %v", args.Slice())
|
||||
if c.NArg() != 1 {
|
||||
return fmt.Errorf("incorrect number of arguments")
|
||||
}
|
||||
tooklitDirArg = args.Get(0)
|
||||
o.runtimeDir = args.Get(0)
|
||||
log.Infof("Successfully parsed arguments")
|
||||
|
||||
return nil
|
||||
@@ -153,7 +312,7 @@ func createHook(toolkitDir string, hookPath string) error {
|
||||
defer hook.Close()
|
||||
|
||||
encoder := json.NewEncoder(hook)
|
||||
err = encoder.Encode(generateOciHook(tooklitDirArg))
|
||||
err = encoder.Encode(generateOciHook(toolkitDir))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error writing hook file '%v': %v", hookPath, err)
|
||||
}
|
||||
@@ -164,23 +323,65 @@ func getHookPath(hooksDir string, hookFilename string) string {
|
||||
return filepath.Join(hooksDir, hookFilename)
|
||||
}
|
||||
|
||||
func generateOciHook(toolkitDir string) hooks.Hook {
|
||||
func generateOciHook(toolkitDir string) podmanHook {
|
||||
hookPath := filepath.Join(toolkitDir, config.NVIDIAContainerRuntimeHookExecutable)
|
||||
envPath := "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:" + toolkitDir
|
||||
always := true
|
||||
|
||||
hook := hooks.Hook{
|
||||
hook := podmanHook{
|
||||
Version: "1.0.0",
|
||||
Stages: []string{"prestart"},
|
||||
Hook: rspec.Hook{
|
||||
Hook: specHook{
|
||||
Path: hookPath,
|
||||
Args: []string{filepath.Base(config.NVIDIAContainerRuntimeHookExecutable), "prestart"},
|
||||
Env: []string{envPath},
|
||||
},
|
||||
When: hooks.When{
|
||||
When: When{
|
||||
Always: &always,
|
||||
Commands: []string{".*"},
|
||||
},
|
||||
}
|
||||
return hook
|
||||
}
|
||||
|
||||
// UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime
|
||||
func UpdateConfig(config *toml.Tree, o *options) error {
|
||||
runtimePath := filepath.Join(o.runtimeDir, "nvidia-container-runtime")
|
||||
return crio.UpdateConfig(config, o.runtimeClass, runtimePath, o.setAsDefault)
|
||||
}
|
||||
|
||||
// RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime
|
||||
func RevertConfig(config *toml.Tree, o *options) error {
|
||||
return crio.RevertConfig(config, o.runtimeClass)
|
||||
}
|
||||
|
||||
// RestartCrio restarts crio depending on the value of restartModeFlag
|
||||
func RestartCrio(o *options) error {
|
||||
switch o.restartMode {
|
||||
case restartModeNone:
|
||||
log.Warnf("Skipping restart of crio due to --restart-mode=%v", o.restartMode)
|
||||
return nil
|
||||
case restartModeSystemd:
|
||||
return RestartCrioSystemd(o.hostRootMount)
|
||||
default:
|
||||
return fmt.Errorf("invalid restart mode specified: %v", o.restartMode)
|
||||
}
|
||||
}
|
||||
|
||||
// RestartCrioSystemd restarts cri-o using systemctl
|
||||
func RestartCrioSystemd(hostRootMount string) error {
|
||||
log.Infof("Restarting cri-o using systemd and host root mounted at %v", hostRootMount)
|
||||
|
||||
command := "chroot"
|
||||
args := []string{hostRootMount, "systemctl", "restart", "crio"}
|
||||
|
||||
cmd := exec.Command(command, args...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error restarting crio using systemd: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
50
tools/container/crio/hooks.go
Normal file
50
tools/container/crio/hooks.go
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
// podmanHook is the hook configuration structure.
|
||||
// This is taken from `Hook` at https://github.com/containers/podman/blob/3c53200e9d61fdf95fe1da825bb2a89372551350/pkg/hooks/1.0.0/hook.go#L18
|
||||
type podmanHook struct {
|
||||
Version string `json:"version"`
|
||||
Hook specHook `json:"hook"`
|
||||
When When `json:"when"`
|
||||
Stages []string `json:"stages"`
|
||||
}
|
||||
|
||||
// specHook specifies a command that is run at a particular event in the lifecycle of a container
|
||||
// This is taken from `Hook` at https://github.com/opencontainers/runtime-spec/blob/9ee22abf867e374c5464c7bbe0d0db01482254ab/specs-go/config.go#L128
|
||||
type specHook struct {
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args,omitempty"`
|
||||
Env []string `json:"env,omitempty"`
|
||||
Timeout *int `json:"timeout,omitempty"`
|
||||
}
|
||||
|
||||
// When holds hook-injection conditions.
|
||||
// This is taken from `When` at https://github.com/containers/podman/blob/3c53200e9d61fdf95fe1da825bb2a89372551350/pkg/hooks/1.0.0/when.go#L11
|
||||
type When struct {
|
||||
Always *bool `json:"always,omitempty"`
|
||||
Annotations map[string]string `json:"annotations,omitempty"`
|
||||
Commands []string `json:"commands,omitempty"`
|
||||
HasBindMounts *bool `json:"hasBindMounts,omitempty"`
|
||||
|
||||
// Or enables any-of matching.
|
||||
//
|
||||
// Deprecated: this property is for is backwards-compatibility with
|
||||
// 0.1.0 hooks. It will be removed when we drop support for them.
|
||||
Or bool `json:"-"`
|
||||
}
|
||||
@@ -250,10 +250,15 @@ func LoadConfig(config string) (map[string]interface{}, error) {
|
||||
|
||||
// UpdateConfig updates the docker config to include the nvidia runtimes
|
||||
func UpdateConfig(config map[string]interface{}, o *options) error {
|
||||
defaultRuntime := o.getDefaultRuntime()
|
||||
runtimes := o.runtimes()
|
||||
for runtimeName, runtimePath := range o.getRuntimeBinaries() {
|
||||
setAsDefault := runtimeName == o.getDefaultRuntime()
|
||||
err := docker.UpdateConfig(config, runtimeName, runtimePath, setAsDefault)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update runtime %q: %v", runtimeName, err)
|
||||
}
|
||||
}
|
||||
|
||||
return docker.UpdateConfig(config, defaultRuntime, runtimes)
|
||||
return nil
|
||||
}
|
||||
|
||||
//RevertConfig reverts the docker config to remove the nvidia runtime
|
||||
@@ -392,19 +397,6 @@ func (o options) getDefaultRuntime() string {
|
||||
return o.runtimeName
|
||||
}
|
||||
|
||||
// runtimes returns the docker runtime definitions for the supported nvidia runtimes
|
||||
// for the given options. This includes the path with the options runtimeDir applied
|
||||
func (o options) runtimes() map[string]interface{} {
|
||||
runtimes := make(map[string]interface{})
|
||||
for r, bin := range o.getRuntimeBinaries() {
|
||||
runtimes[r] = map[string]interface{}{
|
||||
"path": bin,
|
||||
"args": []string{},
|
||||
}
|
||||
}
|
||||
return runtimes
|
||||
}
|
||||
|
||||
// getRuntimeBinaries returns a map of runtime names to binary paths. This includes the
|
||||
// renaming of the `nvidia` runtime as per the --runtime-class command line flag.
|
||||
func (o options) getRuntimeBinaries() map[string]string {
|
||||
|
||||
@@ -30,24 +30,35 @@ var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "container
|
||||
var waitingForSignal = make(chan bool, 1)
|
||||
var signalReceived = make(chan bool, 1)
|
||||
|
||||
var destinationArg string
|
||||
var noDaemonFlag bool
|
||||
var toolkitArgsFlag string
|
||||
var runtimeFlag string
|
||||
var runtimeArgsFlag string
|
||||
// options stores the command line arguments
|
||||
type options struct {
|
||||
noDaemon bool
|
||||
runtime string
|
||||
runtimeArgs string
|
||||
root string
|
||||
}
|
||||
|
||||
// Version defines the CLI version. This is set at build time using LD FLAGS
|
||||
var Version = "development"
|
||||
|
||||
func main() {
|
||||
remainingArgs, root, err := ParseArgs(os.Args)
|
||||
if err != nil {
|
||||
log.Errorf("Error: unable to parse arguments: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
options := options{}
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "nvidia-toolkit"
|
||||
c.Usage = "Install the nvidia-container-toolkit for use by a given runtime"
|
||||
c.UsageText = "DESTINATION [-n | --no-daemon] [-t | --toolkit-args] [-r | --runtime] [-u | --runtime-args]"
|
||||
c.UsageText = "[DESTINATION] [-n | --no-daemon] [-r | --runtime] [-u | --runtime-args]"
|
||||
c.Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit"
|
||||
c.Version = Version
|
||||
c.Action = Run
|
||||
c.Action = func(ctx *cli.Context) error {
|
||||
return Run(ctx, &options)
|
||||
}
|
||||
|
||||
// Setup flags for the CLI
|
||||
c.Flags = []cli.Flag{
|
||||
@@ -55,23 +66,15 @@ func main() {
|
||||
Name: "no-daemon",
|
||||
Aliases: []string{"n"},
|
||||
Usage: "terminate immediatly after setting up the runtime. Note that no cleanup will be performed",
|
||||
Destination: &noDaemonFlag,
|
||||
Destination: &options.noDaemon,
|
||||
EnvVars: []string{"NO_DAEMON"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "toolkit-args",
|
||||
Aliases: []string{"t"},
|
||||
Usage: "arguments to pass to the underlying 'toolkit' command",
|
||||
Value: defaultToolkitArgs,
|
||||
Destination: &toolkitArgsFlag,
|
||||
EnvVars: []string{"TOOLKIT_ARGS"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime",
|
||||
Aliases: []string{"r"},
|
||||
Usage: "the runtime to setup on this node. One of {'docker', 'crio', 'containerd'}",
|
||||
Value: defaultRuntime,
|
||||
Destination: &runtimeFlag,
|
||||
Destination: &options.runtime,
|
||||
EnvVars: []string{"RUNTIME"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
@@ -79,20 +82,20 @@ func main() {
|
||||
Aliases: []string{"u"},
|
||||
Usage: "arguments to pass to 'docker', 'crio', or 'containerd' setup command",
|
||||
Value: defaultRuntimeArgs,
|
||||
Destination: &runtimeArgsFlag,
|
||||
Destination: &options.runtimeArgs,
|
||||
EnvVars: []string{"RUNTIME_ARGS"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "root",
|
||||
Value: root,
|
||||
Usage: "the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to `ROOT`/toolkit",
|
||||
Destination: &options.root,
|
||||
EnvVars: []string{"ROOT"},
|
||||
},
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
log.Infof("Starting %v", c.Name)
|
||||
|
||||
remainingArgs, err := ParseArgs(os.Args)
|
||||
if err != nil {
|
||||
log.Errorf("Error: unable to parse arguments: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err := c.Run(remainingArgs); err != nil {
|
||||
log.Errorf("error running nvidia-toolkit: %v", err)
|
||||
os.Exit(1)
|
||||
@@ -102,8 +105,8 @@ func main() {
|
||||
}
|
||||
|
||||
// Run runs the core logic of the CLI
|
||||
func Run(c *cli.Context) error {
|
||||
err := verifyFlags()
|
||||
func Run(c *cli.Context, o *options) error {
|
||||
err := verifyFlags(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to verify flags: %v", err)
|
||||
}
|
||||
@@ -114,23 +117,23 @@ func Run(c *cli.Context) error {
|
||||
}
|
||||
defer shutdown()
|
||||
|
||||
err = installToolkit()
|
||||
err = installToolkit(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to install toolkit: %v", err)
|
||||
}
|
||||
|
||||
err = setupRuntime()
|
||||
err = setupRuntime(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to setup runtime: %v", err)
|
||||
}
|
||||
|
||||
if !noDaemonFlag {
|
||||
if !o.noDaemon {
|
||||
err = waitForSignal()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to wait for signal: %v", err)
|
||||
}
|
||||
|
||||
err = cleanupRuntime()
|
||||
err = cleanupRuntime(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to cleanup runtime: %v", err)
|
||||
}
|
||||
@@ -139,46 +142,42 @@ func Run(c *cli.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParseArgs parses the command line arguments and returns the remaining arguments
|
||||
func ParseArgs(args []string) ([]string, error) {
|
||||
// ParseArgs checks if a single positional argument was defined and extracts this the root.
|
||||
// If no positional arguments are defined, the it is assumed that the root is specified as a flag.
|
||||
func ParseArgs(args []string) ([]string, string, error) {
|
||||
log.Infof("Parsing arguments")
|
||||
|
||||
numPositionalArgs := 2 // Includes command itself
|
||||
|
||||
if len(args) < numPositionalArgs {
|
||||
return nil, fmt.Errorf("missing arguments")
|
||||
if len(args) < 2 {
|
||||
return args, "", nil
|
||||
}
|
||||
|
||||
for _, arg := range args {
|
||||
if arg == "--help" || arg == "-h" {
|
||||
return []string{args[0], arg}, nil
|
||||
}
|
||||
if arg == "--version" || arg == "-v" {
|
||||
return []string{args[0], arg}, nil
|
||||
}
|
||||
}
|
||||
|
||||
for _, arg := range args[:numPositionalArgs] {
|
||||
var lastPositionalArg int
|
||||
for i, arg := range args {
|
||||
if strings.HasPrefix(arg, "-") {
|
||||
return nil, fmt.Errorf("unexpected flag where argument should be")
|
||||
break
|
||||
}
|
||||
lastPositionalArg = i
|
||||
}
|
||||
|
||||
for _, arg := range args[numPositionalArgs:] {
|
||||
if !strings.HasPrefix(arg, "-") {
|
||||
return nil, fmt.Errorf("unexpected argument where flag should be")
|
||||
}
|
||||
if lastPositionalArg == 0 {
|
||||
return args, "", nil
|
||||
}
|
||||
|
||||
destinationArg = args[1]
|
||||
if lastPositionalArg == 1 {
|
||||
return append([]string{args[0]}, args[2:]...), args[1], nil
|
||||
}
|
||||
|
||||
return append([]string{args[0]}, args[numPositionalArgs:]...), nil
|
||||
return nil, "", fmt.Errorf("unexpected positional argument(s) %v", args[2:lastPositionalArg+1])
|
||||
}
|
||||
|
||||
func verifyFlags() error {
|
||||
func verifyFlags(o *options) error {
|
||||
log.Infof("Verifying Flags")
|
||||
if _, exists := availableRuntimes[runtimeFlag]; !exists {
|
||||
return fmt.Errorf("unknown runtime: %v", runtimeFlag)
|
||||
if o.root == "" {
|
||||
return fmt.Errorf("the install root must be specified")
|
||||
}
|
||||
|
||||
if _, exists := availableRuntimes[o.runtime]; !exists {
|
||||
return fmt.Errorf("unknown runtime: %v", o.runtime)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -220,36 +219,40 @@ func initialize() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func installToolkit() error {
|
||||
toolkitDir := filepath.Join(destinationArg, toolkitSubDir)
|
||||
|
||||
func installToolkit(o *options) error {
|
||||
log.Infof("Installing toolkit")
|
||||
|
||||
cmdline := fmt.Sprintf("%v install %v %v\n", toolkitCommand, toolkitArgsFlag, toolkitDir)
|
||||
cmd := exec.Command("sh", "-c", cmdline)
|
||||
cmdline := []string{
|
||||
toolkitCommand,
|
||||
"install",
|
||||
"--toolkit-root",
|
||||
filepath.Join(o.root, toolkitSubDir),
|
||||
}
|
||||
|
||||
cmd := exec.Command("sh", "-c", strings.Join(cmdline, " "))
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error running %v command: %v", toolkitCommand, err)
|
||||
return fmt.Errorf("error running %v command: %v", cmdline, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRuntime() error {
|
||||
toolkitDir := filepath.Join(destinationArg, toolkitSubDir)
|
||||
func setupRuntime(o *options) error {
|
||||
toolkitDir := filepath.Join(o.root, toolkitSubDir)
|
||||
|
||||
log.Infof("Setting up runtime")
|
||||
|
||||
cmdline := fmt.Sprintf("%v setup %v %v\n", runtimeFlag, runtimeArgsFlag, toolkitDir)
|
||||
cmdline := fmt.Sprintf("%v setup %v %v\n", o.runtime, o.runtimeArgs, toolkitDir)
|
||||
|
||||
cmd := exec.Command("sh", "-c", cmdline)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error running %v command: %v", runtimeFlag, err)
|
||||
return fmt.Errorf("error running %v command: %v", o.runtime, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -262,19 +265,19 @@ func waitForSignal() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func cleanupRuntime() error {
|
||||
toolkitDir := filepath.Join(destinationArg, toolkitSubDir)
|
||||
func cleanupRuntime(o *options) error {
|
||||
toolkitDir := filepath.Join(o.root, toolkitSubDir)
|
||||
|
||||
log.Infof("Cleaning up Runtime")
|
||||
|
||||
cmdline := fmt.Sprintf("%v cleanup %v %v\n", runtimeFlag, runtimeArgsFlag, toolkitDir)
|
||||
cmdline := fmt.Sprintf("%v cleanup %v %v\n", o.runtime, o.runtimeArgs, toolkitDir)
|
||||
|
||||
cmd := exec.Command("sh", "-c", cmdline)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error running %v command: %v", runtimeFlag, err)
|
||||
return fmt.Errorf("error running %v command: %v", o.runtime, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
84
tools/container/nvidia-toolkit/run_test.go
Normal file
84
tools/container/nvidia-toolkit/run_test.go
Normal file
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseArgs(t *testing.T) {
|
||||
testCases := []struct {
|
||||
args []string
|
||||
expectedRemaining []string
|
||||
expectedRoot string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
args: []string{},
|
||||
expectedRemaining: []string{},
|
||||
expectedRoot: "",
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
args: []string{"app"},
|
||||
expectedRemaining: []string{"app"},
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root"},
|
||||
expectedRemaining: []string{"app"},
|
||||
expectedRoot: "root",
|
||||
},
|
||||
{
|
||||
args: []string{"app", "--flag"},
|
||||
expectedRemaining: []string{"app", "--flag"},
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root", "--flag"},
|
||||
expectedRemaining: []string{"app", "--flag"},
|
||||
expectedRoot: "root",
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root", "not-root", "--flag"},
|
||||
expectedError: fmt.Errorf("unexpected positional argument(s) [not-root]"),
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root", "not-root"},
|
||||
expectedError: fmt.Errorf("unexpected positional argument(s) [not-root]"),
|
||||
},
|
||||
{
|
||||
args: []string{"app", "root", "not-root", "also"},
|
||||
expectedError: fmt.Errorf("unexpected positional argument(s) [not-root also]"),
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
remaining, root, err := ParseArgs(tc.args)
|
||||
if tc.expectedError != nil {
|
||||
require.EqualError(t, err, tc.expectedError.Error())
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.ElementsMatch(t, tc.expectedRemaining, remaining)
|
||||
require.Equal(t, tc.expectedRoot, root)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -39,13 +39,21 @@ const (
|
||||
configFilename = "config.toml"
|
||||
)
|
||||
|
||||
var toolkitDirArg string
|
||||
var nvidiaDriverRootFlag string
|
||||
var nvidiaContainerRuntimeDebugFlag string
|
||||
var nvidiaContainerRuntimeLogLevelFlag string
|
||||
var nvidiaContainerCLIDebugFlag string
|
||||
type options struct {
|
||||
DriverRoot string
|
||||
ContainerRuntimeDebug string
|
||||
ContainerRuntimeLogLevel string
|
||||
ContainerCLIDebug string
|
||||
toolkitRoot string
|
||||
|
||||
acceptNVIDIAVisibleDevicesWhenUnprivileged bool
|
||||
acceptNVIDIAVisibleDevicesAsVolumeMounts bool
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
opts := options{}
|
||||
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "toolkit"
|
||||
@@ -57,16 +65,24 @@ func main() {
|
||||
install.Name = "install"
|
||||
install.Usage = "Install the components of the NVIDIA container toolkit"
|
||||
install.ArgsUsage = "<toolkit_directory>"
|
||||
install.Before = parseArgs
|
||||
install.Action = Install
|
||||
install.Before = func(c *cli.Context) error {
|
||||
return validateOptions(c, &opts)
|
||||
}
|
||||
install.Action = func(c *cli.Context) error {
|
||||
return Install(c, &opts)
|
||||
}
|
||||
|
||||
// Create the 'delete' command
|
||||
delete := cli.Command{}
|
||||
delete.Name = "delete"
|
||||
delete.Usage = "Delete the NVIDIA container toolkit"
|
||||
delete.ArgsUsage = "<toolkit_directory>"
|
||||
delete.Before = parseArgs
|
||||
delete.Action = Delete
|
||||
delete.Before = func(c *cli.Context) error {
|
||||
return validateOptions(c, &opts)
|
||||
}
|
||||
delete.Action = func(c *cli.Context) error {
|
||||
return Delete(c, &opts)
|
||||
}
|
||||
|
||||
// Register the subcommand with the top-level CLI
|
||||
c.Commands = []*cli.Command{
|
||||
@@ -78,30 +94,51 @@ func main() {
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-driver-root",
|
||||
Value: DefaultNvidiaDriverRoot,
|
||||
Destination: &nvidiaDriverRootFlag,
|
||||
Destination: &opts.DriverRoot,
|
||||
EnvVars: []string{"NVIDIA_DRIVER_ROOT"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-debug",
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container Runtime",
|
||||
Destination: &nvidiaContainerRuntimeDebugFlag,
|
||||
Destination: &opts.ContainerRuntimeDebug,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-debug-log-level",
|
||||
Destination: &nvidiaContainerRuntimeLogLevelFlag,
|
||||
Destination: &opts.ContainerRuntimeLogLevel,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-cli-debug",
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container CLI",
|
||||
Destination: &nvidiaContainerCLIDebugFlag,
|
||||
Destination: &opts.ContainerCLIDebug,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_CLI_DEBUG"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "accept-nvidia-visible-devices-envvar-when-unprivileged",
|
||||
Usage: "Set the accept-nvidia-visible-devices-envvar-when-unprivileged config option",
|
||||
Value: true,
|
||||
Destination: &opts.acceptNVIDIAVisibleDevicesWhenUnprivileged,
|
||||
EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_ENVVAR_WHEN_UNPRIVILEGED"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "accept-nvidia-visible-devices-as-volume-mounts",
|
||||
Usage: "Set the accept-nvidia-visible-devices-as-volume-mounts config option",
|
||||
Destination: &opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
|
||||
EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "toolkit-root",
|
||||
Usage: "The directory where the NVIDIA Container toolkit is to be installed",
|
||||
Required: true,
|
||||
Destination: &opts.toolkitRoot,
|
||||
EnvVars: []string{"TOOLKIT_ROOT"},
|
||||
},
|
||||
}
|
||||
|
||||
// Update the subcommand flags with the common subcommand flags
|
||||
install.Flags = append([]cli.Flag{}, flags...)
|
||||
delete.Flags = append([]cli.Flag{}, flags...)
|
||||
|
||||
// Run the top-level CLI
|
||||
if err := c.Run(os.Args); err != nil {
|
||||
@@ -109,24 +146,19 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
// parseArgs parses the command line arguments to the CLI
|
||||
func parseArgs(c *cli.Context) error {
|
||||
args := c.Args()
|
||||
|
||||
log.Infof("Parsing arguments: %v", args.Slice())
|
||||
if c.NArg() != 1 {
|
||||
return fmt.Errorf("incorrect number of arguments")
|
||||
// validateOptions checks whether the specified options are valid
|
||||
func validateOptions(c *cli.Context, opts *options) error {
|
||||
if opts.toolkitRoot == "" {
|
||||
return fmt.Errorf("invalid --toolkit-root option: %v", opts.toolkitRoot)
|
||||
}
|
||||
toolkitDirArg = args.Get(0)
|
||||
log.Infof("Successfully parsed arguments")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete removes the NVIDIA container toolkit
|
||||
func Delete(cli *cli.Context) error {
|
||||
log.Infof("Deleting NVIDIA container toolkit from '%v'", toolkitDirArg)
|
||||
err := os.RemoveAll(toolkitDirArg)
|
||||
func Delete(cli *cli.Context, opts *options) error {
|
||||
log.Infof("Deleting NVIDIA container toolkit from '%v'", opts.toolkitRoot)
|
||||
err := os.RemoveAll(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error deleting toolkit directory: %v", err)
|
||||
}
|
||||
@@ -135,44 +167,44 @@ func Delete(cli *cli.Context) error {
|
||||
|
||||
// Install installs the components of the NVIDIA container toolkit.
|
||||
// Any existing installation is removed.
|
||||
func Install(cli *cli.Context) error {
|
||||
log.Infof("Installing NVIDIA container toolkit to '%v'", toolkitDirArg)
|
||||
func Install(cli *cli.Context, opts *options) error {
|
||||
log.Infof("Installing NVIDIA container toolkit to '%v'", opts.toolkitRoot)
|
||||
|
||||
log.Infof("Removing existing NVIDIA container toolkit installation")
|
||||
err := os.RemoveAll(toolkitDirArg)
|
||||
err := os.RemoveAll(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing toolkit directory: %v", err)
|
||||
}
|
||||
|
||||
toolkitConfigDir := filepath.Join(toolkitDirArg, ".config", "nvidia-container-runtime")
|
||||
toolkitConfigDir := filepath.Join(opts.toolkitRoot, ".config", "nvidia-container-runtime")
|
||||
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
|
||||
|
||||
err = createDirectories(toolkitDirArg, toolkitConfigDir)
|
||||
err = createDirectories(opts.toolkitRoot, toolkitConfigDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not create required directories: %v", err)
|
||||
}
|
||||
|
||||
err = installContainerLibraries(toolkitDirArg)
|
||||
err = installContainerLibraries(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error installing NVIDIA container library: %v", err)
|
||||
}
|
||||
|
||||
err = installContainerRuntimes(toolkitDirArg, nvidiaDriverRootFlag)
|
||||
err = installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error installing NVIDIA container runtime: %v", err)
|
||||
}
|
||||
|
||||
nvidiaContainerCliExecutable, err := installContainerCLI(toolkitDirArg)
|
||||
nvidiaContainerCliExecutable, err := installContainerCLI(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error installing NVIDIA container CLI: %v", err)
|
||||
}
|
||||
|
||||
_, err = installRuntimeHook(toolkitDirArg, toolkitConfigPath)
|
||||
_, err = installRuntimeHook(opts.toolkitRoot, toolkitConfigPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
|
||||
}
|
||||
|
||||
err = installToolkitConfig(toolkitConfigPath, nvidiaDriverRootFlag, nvidiaContainerCliExecutable)
|
||||
err = installToolkitConfig(toolkitConfigPath, nvidiaContainerCliExecutable, opts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
|
||||
}
|
||||
@@ -185,8 +217,8 @@ func Install(cli *cli.Context) error {
|
||||
// A predefined set of library candidates are considered, with the first one
|
||||
// resulting in success being installed to the toolkit folder. The install process
|
||||
// resolves the symlink for the library and copies the versioned library itself.
|
||||
func installContainerLibraries(toolkitDir string) error {
|
||||
log.Infof("Installing NVIDIA container library to '%v'", toolkitDir)
|
||||
func installContainerLibraries(toolkitRoot string) error {
|
||||
log.Infof("Installing NVIDIA container library to '%v'", toolkitRoot)
|
||||
|
||||
libs := []string{
|
||||
"libnvidia-container.so.1",
|
||||
@@ -194,7 +226,7 @@ func installContainerLibraries(toolkitDir string) error {
|
||||
}
|
||||
|
||||
for _, l := range libs {
|
||||
err := installLibrary(l, toolkitDir)
|
||||
err := installLibrary(l, toolkitRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to install %s: %v", l, err)
|
||||
}
|
||||
@@ -204,15 +236,15 @@ func installContainerLibraries(toolkitDir string) error {
|
||||
}
|
||||
|
||||
// installLibrary installs the specified library to the toolkit directory.
|
||||
func installLibrary(libName string, toolkitDir string) error {
|
||||
func installLibrary(libName string, toolkitRoot string) error {
|
||||
libraryPath, err := findLibrary("", libName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error locating NVIDIA container library: %v", err)
|
||||
}
|
||||
|
||||
installedLibPath, err := installFileToFolder(toolkitDir, libraryPath)
|
||||
installedLibPath, err := installFileToFolder(toolkitRoot, libraryPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error installing %v to %v: %v", libraryPath, toolkitDir, err)
|
||||
return fmt.Errorf("error installing %v to %v: %v", libraryPath, toolkitRoot, err)
|
||||
}
|
||||
log.Infof("Installed '%v' to '%v'", libraryPath, installedLibPath)
|
||||
|
||||
@@ -220,7 +252,7 @@ func installLibrary(libName string, toolkitDir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = installSymlink(toolkitDir, libName, installedLibPath)
|
||||
err = installSymlink(toolkitRoot, libName, installedLibPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error installing symlink for NVIDIA container library: %v", err)
|
||||
}
|
||||
@@ -230,7 +262,7 @@ func installLibrary(libName string, toolkitDir string) error {
|
||||
|
||||
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
|
||||
// that the settings are updated to match the desired install and nvidia driver directories.
|
||||
func installToolkitConfig(toolkitConfigPath string, nvidiaDriverDir string, nvidiaContainerCliExecutablePath string) error {
|
||||
func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutablePath string, opts *options) error {
|
||||
log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
|
||||
|
||||
config, err := toml.LoadFile(nvidiaContainerToolkitConfigSource)
|
||||
@@ -244,6 +276,10 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaDriverDir string, nvid
|
||||
}
|
||||
defer targetConfig.Close()
|
||||
|
||||
// Set the options in the root toml table
|
||||
config.Set("accept-nvidia-visible-devices-envvar-when-unprivileged", opts.acceptNVIDIAVisibleDevicesWhenUnprivileged)
|
||||
config.Set("accept-nvidia-visible-devices-as-volume-mounts", opts.acceptNVIDIAVisibleDevicesAsVolumeMounts)
|
||||
|
||||
nvidiaContainerCliKey := func(p string) []string {
|
||||
return []string{"nvidia-container-cli", p}
|
||||
}
|
||||
@@ -253,17 +289,17 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaDriverDir string, nvid
|
||||
ldconfigPath := fmt.Sprintf("%s", config.GetPath(nvidiaContainerCliKey("ldconfig")))
|
||||
|
||||
// Use the driver run root as the root:
|
||||
driverLdconfigPath := "@" + filepath.Join(nvidiaDriverDir, strings.TrimPrefix(ldconfigPath, "@/"))
|
||||
driverLdconfigPath := "@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/"))
|
||||
|
||||
config.SetPath(nvidiaContainerCliKey("root"), nvidiaDriverDir)
|
||||
config.SetPath(nvidiaContainerCliKey("root"), opts.DriverRoot)
|
||||
config.SetPath(nvidiaContainerCliKey("path"), nvidiaContainerCliExecutablePath)
|
||||
config.SetPath(nvidiaContainerCliKey("ldconfig"), driverLdconfigPath)
|
||||
|
||||
// Set the debug options if selected
|
||||
debugOptions := map[string]string{
|
||||
"nvidia-container-runtime.debug": nvidiaContainerRuntimeDebugFlag,
|
||||
"nvidia-container-runtime.log-level": nvidiaContainerRuntimeLogLevelFlag,
|
||||
"nvidia-container-cli.debug": nvidiaContainerCLIDebugFlag,
|
||||
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
|
||||
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
|
||||
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
|
||||
}
|
||||
for key, value := range debugOptions {
|
||||
if value == "" {
|
||||
@@ -279,16 +315,20 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaDriverDir string, nvid
|
||||
if err != nil {
|
||||
return fmt.Errorf("error writing config: %v", err)
|
||||
}
|
||||
|
||||
os.Stdout.WriteString("Using config:\n")
|
||||
config.WriteTo(os.Stdout)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// installContainerCLI sets up the NVIDIA container CLI executable, copying the executable
|
||||
// and implementing the required wrapper
|
||||
func installContainerCLI(toolkitDir string) (string, error) {
|
||||
func installContainerCLI(toolkitRoot string) (string, error) {
|
||||
log.Infof("Installing NVIDIA container CLI from '%v'", nvidiaContainerCliSource)
|
||||
|
||||
env := map[string]string{
|
||||
"LD_LIBRARY_PATH": toolkitDir,
|
||||
"LD_LIBRARY_PATH": toolkitRoot,
|
||||
}
|
||||
|
||||
e := executable{
|
||||
@@ -300,7 +340,7 @@ func installContainerCLI(toolkitDir string) (string, error) {
|
||||
env: env,
|
||||
}
|
||||
|
||||
installedPath, err := e.install(toolkitDir)
|
||||
installedPath, err := e.install(toolkitRoot)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error installing NVIDIA container CLI: %v", err)
|
||||
}
|
||||
@@ -309,7 +349,7 @@ func installContainerCLI(toolkitDir string) (string, error) {
|
||||
|
||||
// installRuntimeHook sets up the NVIDIA runtime hook, copying the executable
|
||||
// and implementing the required wrapper
|
||||
func installRuntimeHook(toolkitDir string, configFilePath string) (string, error) {
|
||||
func installRuntimeHook(toolkitRoot string, configFilePath string) (string, error) {
|
||||
log.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource)
|
||||
|
||||
argLines := []string{
|
||||
@@ -325,12 +365,12 @@ func installRuntimeHook(toolkitDir string, configFilePath string) (string, error
|
||||
argLines: argLines,
|
||||
}
|
||||
|
||||
installedPath, err := e.install(toolkitDir)
|
||||
installedPath, err := e.install(toolkitRoot)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
|
||||
}
|
||||
|
||||
err = installSymlink(toolkitDir, "nvidia-container-toolkit", installedPath)
|
||||
err = installSymlink(toolkitRoot, "nvidia-container-toolkit", installedPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error installing symlink to NVIDIA container runtime hook: %v", err)
|
||||
}
|
||||
@@ -340,8 +380,8 @@ func installRuntimeHook(toolkitDir string, configFilePath string) (string, error
|
||||
|
||||
// installSymlink creates a symlink in the toolkitDirectory that points to the specified target.
|
||||
// Note: The target is assumed to be local to the toolkit directory
|
||||
func installSymlink(toolkitDir string, link string, target string) error {
|
||||
symlinkPath := filepath.Join(toolkitDir, link)
|
||||
func installSymlink(toolkitRoot string, link string, target string) error {
|
||||
symlinkPath := filepath.Join(toolkitRoot, link)
|
||||
targetPath := filepath.Base(target)
|
||||
log.Infof("Creating symlink '%v' -> '%v'", symlinkPath, targetPath)
|
||||
|
||||
|
||||
3
vendor/github.com/BurntSushi/toml/go.mod
generated
vendored
3
vendor/github.com/BurntSushi/toml/go.mod
generated
vendored
@@ -1,3 +0,0 @@
|
||||
module github.com/BurntSushi/toml
|
||||
|
||||
go 1.16
|
||||
64
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/cgo_helpers.go
generated
vendored
Normal file
64
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/cgo_helpers.go
generated
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nvml
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
import "C"
|
||||
|
||||
var cgoAllocsUnknown = new(struct{})
|
||||
|
||||
type stringHeader struct {
|
||||
Data unsafe.Pointer
|
||||
Len int
|
||||
}
|
||||
|
||||
func clen(n []byte) int {
|
||||
for i := 0; i < len(n); i++ {
|
||||
if n[i] == 0 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return len(n)
|
||||
}
|
||||
|
||||
func uint32SliceToIntSlice(s []uint32) []int {
|
||||
ret := make([]int, len(s))
|
||||
for i := range s {
|
||||
ret[i] = int(s[i])
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// packPCharString creates a Go string backed by *C.char and avoids copying.
|
||||
func packPCharString(p *C.char) (raw string) {
|
||||
if p != nil && *p != 0 {
|
||||
h := (*stringHeader)(unsafe.Pointer(&raw))
|
||||
h.Data = unsafe.Pointer(p)
|
||||
for *p != 0 {
|
||||
p = (*C.char)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) // p++
|
||||
}
|
||||
h.Len = int(uintptr(unsafe.Pointer(p)) - uintptr(h.Data))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// unpackPCharString represents the data from Go string as *C.char and avoids copying.
|
||||
func unpackPCharString(str string) (*C.char, *struct{}) {
|
||||
h := (*stringHeader)(unsafe.Pointer(&str))
|
||||
return (*C.char)(h.Data), cgoAllocsUnknown
|
||||
}
|
||||
23
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/cgo_helpers.h
generated
vendored
Normal file
23
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/cgo_helpers.h
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// WARNING: THIS FILE WAS AUTOMATICALLY GENERATED.
|
||||
// Code generated by https://git.io/c-for-go. DO NOT EDIT.
|
||||
|
||||
#include "nvml.h"
|
||||
#include <stdlib.h>
|
||||
#pragma once
|
||||
|
||||
#define __CGOGEN 1
|
||||
|
||||
1139
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go
generated
vendored
Normal file
1139
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
27
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const_gen.go
generated
vendored
Normal file
27
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const_gen.go
generated
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nvml
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
)
|
||||
|
||||
const (
|
||||
SYSTEM_PROCESS_NAME_BUFFER_SIZE = 256
|
||||
)
|
||||
|
||||
func STRUCT_VERSION(data interface{}, version uint32) uint32 {
|
||||
return uint32(uint32(reflect.Indirect(reflect.ValueOf(data)).Type().Size()) | (version << uint32(24)))
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user