mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
356 Commits
v1.10.0-rc
...
v1.12.0-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
217a135eb1 | ||
|
|
22e65b320b | ||
|
|
53bb940b30 | ||
|
|
1c1ad8098a | ||
|
|
203db4390c | ||
|
|
b6d9c2c1ad | ||
|
|
429ef4d4e9 | ||
|
|
25759ca933 | ||
|
|
74abea07e2 | ||
|
|
7955bb1a84 | ||
|
|
75b11eb80a | ||
|
|
c958817eef | ||
|
|
80f8c2a418 | ||
|
|
08640a6f64 | ||
|
|
9db31f7506 | ||
|
|
7fd40632fe | ||
|
|
6ef19d2925 | ||
|
|
83ce83239b | ||
|
|
30fb486e44 | ||
|
|
0022661565 | ||
|
|
28e882f26f | ||
|
|
71fbe7a812 | ||
|
|
ce3d94af1a | ||
|
|
0bc09665a8 | ||
|
|
205ba098e9 | ||
|
|
877832da69 | ||
|
|
b7ba96a72e | ||
|
|
93c59f2d9c | ||
|
|
5a56b658ba | ||
|
|
99889671b5 | ||
|
|
a2fb017208 | ||
|
|
f7021d84b5 | ||
|
|
c793fc27d8 | ||
|
|
3d2328bdfd | ||
|
|
76b69f45de | ||
|
|
73e65edaa9 | ||
|
|
cd7ee5a435 | ||
|
|
eac4faddc6 | ||
|
|
bc8a73dde4 | ||
|
|
624b9d8ee6 | ||
|
|
9d6e2ff1b0 | ||
|
|
aca0c7bc5a | ||
|
|
db47b58275 | ||
|
|
59bf7607ce | ||
|
|
61ff3fbd7b | ||
|
|
523fc57ab4 | ||
|
|
ae18c5d847 | ||
|
|
4abdc2f35d | ||
|
|
f8748bfa9a | ||
|
|
5fb0ae2c2d | ||
|
|
899fc72014 | ||
|
|
1267c1d9a2 | ||
|
|
9a697e340b | ||
|
|
abe8ca71e0 | ||
|
|
9bbf7dcf96 | ||
|
|
ec1222b58b | ||
|
|
229b46e0ca | ||
|
|
b6a68c4add | ||
|
|
e588bfac7d | ||
|
|
224020533e | ||
|
|
3736bb3aca | ||
|
|
1e72f92b74 | ||
|
|
896f5b2e9f | ||
|
|
c068d4048f | ||
|
|
8796cd76b0 | ||
|
|
1597ede2af | ||
|
|
3dd8020695 | ||
|
|
dfa041991f | ||
|
|
568896742b | ||
|
|
f52973217f | ||
|
|
efd29f1cec | ||
|
|
4b02670049 | ||
|
|
8550874686 | ||
|
|
38513d5a53 | ||
|
|
a35236a8f6 | ||
|
|
0c2e72b7c1 | ||
|
|
f0bdfbebe4 | ||
|
|
a4fa61d05d | ||
|
|
6e23a635c6 | ||
|
|
4dedac6a24 | ||
|
|
8c1b9b33c1 | ||
|
|
d37c17857e | ||
|
|
a0065456d0 | ||
|
|
a34a571d2e | ||
|
|
bb4cfece61 | ||
|
|
b16d263ee7 | ||
|
|
027395bb8a | ||
|
|
3ecd790206 | ||
|
|
52bb9e186b | ||
|
|
68b6d1cab1 | ||
|
|
bdb67b4fba | ||
|
|
d0c39a11d5 | ||
|
|
9de6361938 | ||
|
|
fb016dca86 | ||
|
|
8beb7b4231 | ||
|
|
2b08a79206 | ||
|
|
5885fead8f | ||
|
|
a9fb7a4a88 | ||
|
|
b5dbcaeaf9 | ||
|
|
80a46d4a5c | ||
|
|
febce822d5 | ||
|
|
e8099a713c | ||
|
|
d9de4a09b8 | ||
|
|
2dbcda2619 | ||
|
|
691b93ffb0 | ||
|
|
cb0c94cd40 | ||
|
|
3168718563 | ||
|
|
dc8972a26a | ||
|
|
0a2d8f4d22 | ||
|
|
8d623967ed | ||
|
|
503ed96275 | ||
|
|
d8ba84d427 | ||
|
|
8e8c41a3bc | ||
|
|
e34fe17b45 | ||
|
|
c5b0278c58 | ||
|
|
8daa257b35 | ||
|
|
6329174cfc | ||
|
|
1ec41c1bf1 | ||
|
|
581a76de38 | ||
|
|
5d52ca8909 | ||
|
|
ad7151d394 | ||
|
|
3269a7b0e7 | ||
|
|
6a155cc606 | ||
|
|
a5bbf613e8 | ||
|
|
22427c1359 | ||
|
|
f17121fd6c | ||
|
|
256e37eb3f | ||
|
|
bdfd123b9d | ||
|
|
3f7dce202a | ||
|
|
a6d21abe14 | ||
|
|
d0f1fe2273 | ||
|
|
8de9593209 | ||
|
|
64b2b50470 | ||
|
|
4dc1451c49 | ||
|
|
211081ff25 | ||
|
|
c1c1d5cf8e | ||
|
|
e91ffef258 | ||
|
|
47c8aa3790 | ||
|
|
33b4e7fb0a | ||
|
|
936da0295b | ||
|
|
c2205c14fb | ||
|
|
56935f5743 | ||
|
|
1b3bae790c | ||
|
|
47559a8c87 | ||
|
|
86412ea821 | ||
|
|
b8aa844171 | ||
|
|
f9464c5cf9 | ||
|
|
9df75e1fa3 | ||
|
|
0218e2ebf7 | ||
|
|
a9dc6550d5 | ||
|
|
ffd6ec3c54 | ||
|
|
de3e0df96c | ||
|
|
e5dadf34d9 | ||
|
|
52145f2d73 | ||
|
|
90df3caf62 | ||
|
|
50db66a925 | ||
|
|
8587fa05bd | ||
|
|
8129dade3c | ||
|
|
3610fe7c33 | ||
|
|
90518e0ce5 | ||
|
|
9c060f06ba | ||
|
|
e848aa7813 | ||
|
|
feedc912e4 | ||
|
|
ab3f05cf62 | ||
|
|
35982e51bf | ||
|
|
94e650c518 | ||
|
|
d9edc18bf8 | ||
|
|
f4d01e0a05 | ||
|
|
648cfaba51 | ||
|
|
3a9de13f4e | ||
|
|
629a68937e | ||
|
|
34e80abdea | ||
|
|
1161b21166 | ||
|
|
bcdef81e30 | ||
|
|
acc0afbb7a | ||
|
|
7584044b3c | ||
|
|
02c14e981c | ||
|
|
37ee972f74 | ||
|
|
3809407b6a | ||
|
|
f9547c447a | ||
|
|
eb85d45137 | ||
|
|
9f0060f651 | ||
|
|
0e6dc3f7ea | ||
|
|
1b4944e1de | ||
|
|
83743e3613 | ||
|
|
87afcc3ef4 | ||
|
|
6ed3a4e1a6 | ||
|
|
8a56671d18 | ||
|
|
1d81db76a6 | ||
|
|
f50aecb84e | ||
|
|
a4258277e1 | ||
|
|
18eb3c7c38 | ||
|
|
a0e728b5c8 | ||
|
|
df0176cca4 | ||
|
|
b68b3c543b | ||
|
|
aea1a85bb4 | ||
|
|
98e874e750 | ||
|
|
eef016c27d | ||
|
|
19f89ecafd | ||
|
|
8817dee66c | ||
|
|
404e266222 | ||
|
|
9b898c65fa | ||
|
|
5c39cf4deb | ||
|
|
beff276a52 | ||
|
|
55cb82c6c8 | ||
|
|
88d1143827 | ||
|
|
d5162b1917 | ||
|
|
ec078543a1 | ||
|
|
9191074666 | ||
|
|
89824849d3 | ||
|
|
877083f091 | ||
|
|
6467fcd0f5 | ||
|
|
fd135f1a8b | ||
|
|
4e08ec2405 | ||
|
|
925c348565 | ||
|
|
25fd1aaf7e | ||
|
|
91e645b91b | ||
|
|
a1c2f07b6e | ||
|
|
7f7bec0668 | ||
|
|
cb34f7c6d1 | ||
|
|
7f47a61986 | ||
|
|
e8843c38f2 | ||
|
|
d66c00dd1d | ||
|
|
55ac8628c8 | ||
|
|
175f75b43f | ||
|
|
da3226745c | ||
|
|
b23e3ea13a | ||
|
|
02f0ee08fc | ||
|
|
4b0e79be50 | ||
|
|
8b729475e2 | ||
|
|
a1319b1786 | ||
|
|
278fa43303 | ||
|
|
d75f364b27 | ||
|
|
52d5021b76 | ||
|
|
7cfd3bd510 | ||
|
|
05ca131858 | ||
|
|
181ce8571d | ||
|
|
2ab0c6abce | ||
|
|
50caf29b4e | ||
|
|
067f7af142 | ||
|
|
d1449951bc | ||
|
|
a05af50b0f | ||
|
|
950aff269b | ||
|
|
e033db559f | ||
|
|
9a24a40fd2 | ||
|
|
df391e2144 | ||
|
|
9146b4d4b6 | ||
|
|
068d7e085b | ||
|
|
79510a8290 | ||
|
|
50240c93bd | ||
|
|
7ca0e5db60 | ||
|
|
c0e6765d46 | ||
|
|
7739b0e8ea | ||
|
|
ab23fc52db | ||
|
|
530d66b5c7 | ||
|
|
dad3e855b5 | ||
|
|
15cbd54d1c | ||
|
|
4cd719692e | ||
|
|
b940294557 | ||
|
|
840cdec36d | ||
|
|
73a5b70a02 | ||
|
|
f0cae49892 | ||
|
|
e07c7f0fa2 | ||
|
|
52ce97929c | ||
|
|
084eae6e0d | ||
|
|
f656b5c887 | ||
|
|
55c1d7c256 | ||
|
|
0f2b20fffc | ||
|
|
bb69727148 | ||
|
|
0b4f3aaf69 | ||
|
|
e5125515f0 | ||
|
|
033b2fd90d | ||
|
|
a0a00e38fd | ||
|
|
77cf70b625 | ||
|
|
8ab3d713bc | ||
|
|
c58d81cec5 | ||
|
|
2a3b87157a | ||
|
|
a68d1d914c | ||
|
|
f7ac8b8139 | ||
|
|
b2902cc04a | ||
|
|
25710468dc | ||
|
|
4a19bf16a8 | ||
|
|
c77e86137e | ||
|
|
60dacb76b6 | ||
|
|
19138a2110 | ||
|
|
bdb43aa8f2 | ||
|
|
d62cce3c75 | ||
|
|
ff86ecb2a5 | ||
|
|
ad9ec1efae | ||
|
|
9db5f9c9e8 | ||
|
|
4c49f75365 | ||
|
|
e591f3f26b | ||
|
|
e0ad82e467 | ||
|
|
3a1404f2f4 | ||
|
|
cf7bb91481 | ||
|
|
ba0e606df2 | ||
|
|
ae57a2fc93 | ||
|
|
1eb0e3c8b3 | ||
|
|
a524c44161 | ||
|
|
675fbace01 | ||
|
|
eac326c5ea | ||
|
|
b0f7a3809f | ||
|
|
126c004ee0 | ||
|
|
d2516cb5d5 | ||
|
|
4696d7ee69 | ||
|
|
ef6f48e9f7 | ||
|
|
088db09180 | ||
|
|
b8ef6be6ea | ||
|
|
1d2e1bd403 | ||
|
|
55efdc8765 | ||
|
|
395f6cecb2 | ||
|
|
e9d929dc2f | ||
|
|
117f68fa6e | ||
|
|
7574a0d7de | ||
|
|
335de5a352 | ||
|
|
c76946cbcc | ||
|
|
e93bafa6d4 | ||
|
|
785f120c31 | ||
|
|
9e46d41dbe | ||
|
|
70c4588197 | ||
|
|
9f50ac95c4 | ||
|
|
75ce057878 | ||
|
|
9d2363e12e | ||
|
|
49f4bb3198 | ||
|
|
583793b7ae | ||
|
|
5d7b3a4a96 | ||
|
|
a672713dba | ||
|
|
50cf07e4cd | ||
|
|
8f0e1906c2 | ||
|
|
2e319b5b08 | ||
|
|
f4d87e6912 | ||
|
|
fd06c7a00b | ||
|
|
8fabeed3a4 | ||
|
|
0c737bbdcc | ||
|
|
38a4c9fa8f | ||
|
|
6e60b24828 | ||
|
|
bdf997c761 | ||
|
|
4ce932e7a7 | ||
|
|
4145cdf7f7 | ||
|
|
0b2be45ba2 | ||
|
|
ce3cdb6fd9 | ||
|
|
3ba18f89b0 | ||
|
|
0de159e8b4 | ||
|
|
3fbffa0b48 | ||
|
|
75dfea1406 | ||
|
|
c24bd4aa4e | ||
|
|
2b9dc5cbcf | ||
|
|
234d05e57e | ||
|
|
abb0b7be5d | ||
|
|
c09e5aca77 | ||
|
|
6709da4cea | ||
|
|
84f7daf108 | ||
|
|
ac49dc320c | ||
|
|
d304e06ffe | ||
|
|
49756cb7ba | ||
|
|
8c7d919d9f |
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -12,9 +12,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
default:
|
||||
image: docker:stable
|
||||
image: docker
|
||||
services:
|
||||
- name: docker:stable-dind
|
||||
- name: docker:dind
|
||||
command: ["--experimental"]
|
||||
|
||||
variables:
|
||||
@@ -34,51 +34,91 @@ stages:
|
||||
- scan
|
||||
- release
|
||||
|
||||
.main-or-manual:
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
|
||||
- if: $CI_PIPELINE_SOURCE == "schedule"
|
||||
when: manual
|
||||
|
||||
# Define the distribution targets
|
||||
.dist-amazonlinux2:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: amazonlinux2
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-centos7:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: centos7
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-centos8:
|
||||
variables:
|
||||
DIST: centos8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-debian10:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: debian10
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-debian9:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: debian9
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-fedora35:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: fedora35
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-opensuse-leap15.1:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: opensuse-leap15.1
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-ubi8:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubi8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-ubuntu16.04:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubuntu16.04
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-ubuntu18.04:
|
||||
variables:
|
||||
DIST: ubuntu18.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-ubuntu20.04:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubuntu20.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-packaging:
|
||||
variables:
|
||||
@@ -98,6 +138,8 @@ stages:
|
||||
ARCH: arm64
|
||||
|
||||
.arch-ppc64le:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
ARCH: ppc64le
|
||||
|
||||
@@ -138,7 +180,7 @@ test-packaging:
|
||||
# Download the regctl binary for use in the release steps
|
||||
.regctl-setup:
|
||||
before_script:
|
||||
- export REGCTL_VERSION=v0.3.10
|
||||
- export REGCTL_VERSION=v0.4.5
|
||||
- apk add --no-cache curl
|
||||
- mkdir -p bin
|
||||
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
|
||||
@@ -190,7 +232,7 @@ test-packaging:
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/container-toolkit"
|
||||
|
||||
# Define an external release step that pushes an image to an external repository.
|
||||
# This includes a devlopment image off master.
|
||||
# This includes a devlopment image off main.
|
||||
.release:external:
|
||||
extends:
|
||||
- .release
|
||||
@@ -210,13 +252,6 @@ release:staging-centos7:
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
release:staging-centos8:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- image-centos8
|
||||
|
||||
release:staging-ubi8:
|
||||
extends:
|
||||
- .release:staging
|
||||
@@ -239,10 +274,7 @@ release:staging-ubuntu20.04:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- test-toolkit-ubuntu20.04
|
||||
- test-containerd-ubuntu20.04
|
||||
- test-crio-ubuntu20.04
|
||||
- test-docker-ubuntu20.04
|
||||
- image-ubuntu20.04
|
||||
|
||||
release:staging-packaging:
|
||||
extends:
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,6 +4,8 @@ dist
|
||||
/coverage.out*
|
||||
/test/output/
|
||||
/nvidia-container-runtime
|
||||
/nvidia-container-runtime-hook
|
||||
/nvidia-container-toolkit
|
||||
/nvidia-ctk
|
||||
/shared-*
|
||||
/release-*
|
||||
104
.gitlab-ci.yml
104
.gitlab-ci.yml
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -94,9 +94,9 @@ unit-tests:
|
||||
- .multi-arch-build
|
||||
- .package-artifacts
|
||||
stage: package-build
|
||||
timeout: 2h 30m
|
||||
timeout: 3h
|
||||
script:
|
||||
- ./scripts/release.sh ${DIST}-${ARCH}
|
||||
- ./scripts/build-packages.sh ${DIST}-${ARCH}
|
||||
|
||||
artifacts:
|
||||
name: ${ARTIFACTS_NAME}
|
||||
@@ -158,6 +158,18 @@ package-debian9-amd64:
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
package-fedora35-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-aarch64
|
||||
|
||||
package-fedora35-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-x86_64
|
||||
|
||||
package-opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
@@ -231,16 +243,6 @@ image-centos7:
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-x86_64
|
||||
- package-centos8-ppc64le
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-build
|
||||
@@ -260,7 +262,8 @@ image-ubuntu18.04:
|
||||
needs:
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
- job: package-ubuntu18.04-ppc64le
|
||||
optional: true
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
@@ -279,21 +282,36 @@ image-packaging:
|
||||
- .package-artifacts
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- package-amazonlinux2-aarch64
|
||||
- package-amazonlinux2-x86_64
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-ppc64le
|
||||
- package-centos8-x86_64
|
||||
- package-debian10-amd64
|
||||
- package-debian9-amd64
|
||||
- package-opensuse-leap15.1-x86_64
|
||||
- package-ubuntu16.04-amd64
|
||||
- package-ubuntu16.04-ppc64le
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
- job: package-centos8-aarch64
|
||||
- job: package-centos8-x86_64
|
||||
- job: package-ubuntu18.04-amd64
|
||||
- job: package-ubuntu18.04-arm64
|
||||
- job: package-amazonlinux2-aarch64
|
||||
optional: true
|
||||
- job: package-amazonlinux2-x86_64
|
||||
optional: true
|
||||
- job: package-centos7-ppc64le
|
||||
optional: true
|
||||
- job: package-centos7-x86_64
|
||||
optional: true
|
||||
- job: package-centos8-ppc64le
|
||||
optional: true
|
||||
- job: package-debian10-amd64
|
||||
optional: true
|
||||
- job: package-debian9-amd64
|
||||
optional: true
|
||||
- job: package-fedora35-aarch64
|
||||
optional: true
|
||||
- job: package-fedora35-x86_64
|
||||
optional: true
|
||||
- job: package-opensuse-leap15.1-x86_64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-amd64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-ppc64le
|
||||
optional: true
|
||||
- job: package-ubuntu18.04-ppc64le
|
||||
optional: true
|
||||
|
||||
# Define publish test helpers
|
||||
.test:toolkit:
|
||||
@@ -353,31 +371,3 @@ test-docker-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-toolkit-ubuntu20.04:
|
||||
extends:
|
||||
- .test:toolkit
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-containerd-ubuntu20.04:
|
||||
extends:
|
||||
- .test:containerd
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-crio-ubuntu20.04:
|
||||
extends:
|
||||
- .test:crio
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-docker-ubuntu20.04:
|
||||
extends:
|
||||
- .test:docker
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
|
||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,9 +1,12 @@
|
||||
[submodule "third_party/libnvidia-container"]
|
||||
path = third_party/libnvidia-container
|
||||
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
|
||||
branch = main
|
||||
[submodule "third_party/nvidia-container-runtime"]
|
||||
path = third_party/nvidia-container-runtime
|
||||
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git
|
||||
branch = main
|
||||
[submodule "third_party/nvidia-docker"]
|
||||
path = third_party/nvidia-docker
|
||||
url = https://gitlab.com/nvidia/container-toolkit/nvidia-docker.git
|
||||
branch = main
|
||||
|
||||
244
.nvidia-ci.yml
244
.nvidia-ci.yml
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -27,14 +27,17 @@ default:
|
||||
variables:
|
||||
DOCKER_DRIVER: overlay2
|
||||
DOCKER_TLS_CERTDIR: "/certs"
|
||||
# Release "devel"-tagged images off the master branch
|
||||
RELEASE_DEVEL_BRANCH: "master"
|
||||
# Release "devel"-tagged images off the main branch
|
||||
RELEASE_DEVEL_BRANCH: "main"
|
||||
DEVEL_RELEASE_IMAGE_VERSION: "devel"
|
||||
# On the multi-arch builder we don't need the qemu setup.
|
||||
SKIP_QEMU_SETUP: "1"
|
||||
# Define the public staging registry
|
||||
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
|
||||
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
|
||||
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
|
||||
# TODO: We should set the kitmaker release folder here once we have the end-to-end workflow set up
|
||||
KITMAKER_RELEASE_FOLDER: "testing"
|
||||
|
||||
.image-pull:
|
||||
stage: image-build
|
||||
@@ -48,8 +51,9 @@ variables:
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PUSH_MULTIPLE_TAGS: "false"
|
||||
# We delay the job start to allow the public pipeline to generate the required images.
|
||||
when: delayed
|
||||
start_in: 30 minutes
|
||||
rules:
|
||||
- when: delayed
|
||||
start_in: 30 minutes
|
||||
timeout: 30 minutes
|
||||
retry:
|
||||
max: 2
|
||||
@@ -67,34 +71,132 @@ variables:
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-centos7
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-centos8
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubi8
|
||||
- .image-pull
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu18.04
|
||||
- .image-pull
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu20.04
|
||||
- .image-pull
|
||||
|
||||
# The DIST=packaging target creates an image containing all built packages
|
||||
image-packaging:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-packaging
|
||||
- .image-pull
|
||||
|
||||
# Define the package release targets
|
||||
release:packages:amazonlinux2-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-amazonlinux2
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:amazonlinux2-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-amazonlinux2
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:centos7-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos7
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:centos7-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos7
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:centos8-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:centos8-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:centos8-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:debian10-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-debian10
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:debian9-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:fedora35-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-fedora35
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:fedora35-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-fedora35
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-opensuse-leap15.1
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:ubuntu16.04-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu16.04
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:ubuntu16.04-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu16.04
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:ubuntu18.04-arm64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-arm64
|
||||
|
||||
release:packages:ubuntu18.04-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-ppc64le
|
||||
|
||||
# We skip the integration tests for the internal CI:
|
||||
.integration:
|
||||
@@ -112,9 +214,9 @@ image-packaging:
|
||||
variables:
|
||||
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
|
||||
IMAGE_ARCHIVE: "container-toolkit.tar"
|
||||
except:
|
||||
variables:
|
||||
- $SKIP_SCANS && $SKIP_SCANS == "yes"
|
||||
rules:
|
||||
- if: $SKIP_SCANS != "yes"
|
||||
- when: manual
|
||||
before_script:
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
# TODO: We should specify the architecture here and scan all architectures
|
||||
@@ -139,85 +241,59 @@ image-packaging:
|
||||
# Define the scan targets
|
||||
scan-centos7-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
scan-centos7-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-centos7
|
||||
- scan-centos7-amd64
|
||||
|
||||
scan-centos8-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos8
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-centos8
|
||||
|
||||
scan-centos8-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos8
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-centos8
|
||||
- scan-centos8-amd64
|
||||
|
||||
scan-ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu18.04
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
scan-ubuntu18.04-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu18.04
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
- scan-ubuntu18.04-amd64
|
||||
|
||||
scan-ubuntu20.04-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
scan-ubuntu20.04-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- scan-ubuntu20.04-amd64
|
||||
|
||||
scan-ubi8-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubi8
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubi8
|
||||
|
||||
scan-ubi8-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubi8
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubi8
|
||||
- scan-ubi8-amd64
|
||||
@@ -232,14 +308,26 @@ scan-ubi8-arm64:
|
||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||
|
||||
.release:dockerhub:
|
||||
extends:
|
||||
- .release:external
|
||||
.release:packages:
|
||||
stage: release
|
||||
needs:
|
||||
- image-packaging
|
||||
variables:
|
||||
OUT_REGISTRY_USER: "${REGISTRY_USER}"
|
||||
OUT_REGISTRY_TOKEN: "${REGISTRY_TOKEN}"
|
||||
OUT_REGISTRY: "${DOCKERHUB_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${REGISTRY_IMAGE}"
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
PACKAGE_REGISTRY: "${CI_REGISTRY}"
|
||||
PACKAGE_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
||||
PACKAGE_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
|
||||
PACKAGE_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-${PACKAGE_REPO_TYPE}-local"
|
||||
KITMAKER_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${KITMAKER_RELEASE_FOLDER}"
|
||||
script:
|
||||
- !reference [.regctl-setup, before_script]
|
||||
- apk add --no-cache bash
|
||||
- regctl registry login "${PACKAGE_REGISTRY}" -u "${PACKAGE_REGISTRY_USER}" -p "${PACKAGE_REGISTRY_TOKEN}"
|
||||
- ./scripts/extract-packages.sh "${PACKAGE_IMAGE_NAME}:${PACKAGE_IMAGE_TAG}" "${DIST}-${ARCH}"
|
||||
# TODO: ./scripts/release-packages-artifactory.sh "${DIST}-${ARCH}" "${PACKAGE_ARTIFACTORY_REPO}"
|
||||
- ./scripts/release-kitmaker-artifactory.sh "${DIST}-${ARCH}" "${KITMAKER_ARTIFACTORY_REPO}"
|
||||
|
||||
release:staging-ubuntu18.04:
|
||||
extends:
|
||||
@@ -248,62 +336,24 @@ release:staging-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
release:staging-ubuntu20.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
# Define the external release targets
|
||||
# Release to NGC
|
||||
release:ngc-centos7:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-centos7
|
||||
|
||||
release:ngc-centos8:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-centos8
|
||||
|
||||
release:ngc-ubuntu18.04:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu18.04
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubuntu20.04:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu20.04
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubi8:
|
||||
extends:
|
||||
- .dist-ubi8
|
||||
- .release:ngc
|
||||
- .dist-ubi8
|
||||
|
||||
# Release to Dockerhub
|
||||
release:dockerhub-centos7:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-centos7
|
||||
|
||||
release:dockerhub-centos8:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-centos8
|
||||
|
||||
release:dockerhub-ubuntu18.04:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-ubuntu18.04
|
||||
|
||||
release:dockerhub-ubuntu20.04:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-ubuntu20.04
|
||||
|
||||
release:dockerhub-ubi8:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-ubi8
|
||||
|
||||
211
CHANGELOG.md
Normal file
211
CHANGELOG.md
Normal file
@@ -0,0 +1,211 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.12.0-rc.2
|
||||
|
||||
* Inject Direct Rendering Manager (DRM) devices into a container using the NVIDIA Container Runtime
|
||||
* Improve logging of errors from the NVIDIA Container Runtime
|
||||
* Improve CDI specification generation to support rootless podman
|
||||
* Use `nvidia-ctk cdi generate` to generate CDI specifications instead of `nvidia-ctk info generate-cdi`
|
||||
* [libnvidia-container] Skip creation of existing files when these are already mounted
|
||||
|
||||
## v1.12.0-rc.1
|
||||
|
||||
* Add support for multiple Docker Swarm resources
|
||||
* Improve injection of Vulkan configurations and libraries
|
||||
* Add `nvidia-ctk info generate-cdi` command to generated CDI specification for available devices
|
||||
* [libnvidia-container] Include NVVM compiler library in compute libs
|
||||
|
||||
## v1.11.0
|
||||
|
||||
* Promote v1.11.0-rc.3 to v1.11.0
|
||||
|
||||
## v1.11.0-rc.3
|
||||
|
||||
* Build fedora35 packages
|
||||
* Introduce an `nvidia-container-toolkit-base` package for better dependency management
|
||||
* Fix removal of `nvidia-container-runtime-hook` on RPM-based systems
|
||||
* Inject platform files into container on Tegra-based systems
|
||||
* [toolkit container] Update CUDA base images to 11.7.1
|
||||
* [libnvidia-container] Preload libgcc_s.so.1 on arm64 systems
|
||||
|
||||
## v1.11.0-rc.2
|
||||
|
||||
* Allow `accept-nvidia-visible-devices-*` config options to be set by toolkit container
|
||||
* [libnvidia-container] Fix bug where LDCache was not updated when the `--no-pivot-root` option was specified
|
||||
|
||||
## v1.11.0-rc.1
|
||||
|
||||
* Add discovery of GPUDirect Storage (`nvidia-fs*`) devices if the `NVIDIA_GDS` environment variable of the container is set to `enabled`
|
||||
* Add discovery of MOFED Infiniband devices if the `NVIDIA_MOFED` environment variable of the container is set to `enabled`
|
||||
* Fix bug in CSV mode where libraries listed as `sym` entries in mount specification are not added to the LDCache.
|
||||
* Rename `nvidia-container-toolkit` executable to `nvidia-container-runtime-hook` and create `nvidia-container-toolkit` as a symlink to `nvidia-container-runtime-hook` instead.
|
||||
* Add `nvidia-ctk runtime configure` command to configure the Docker config file (e.g. `/etc/docker/daemon.json`) for use with the NVIDIA Container Runtime.
|
||||
|
||||
## v1.10.0
|
||||
|
||||
* Promote v1.10.0-rc.3 to v1.10.0
|
||||
|
||||
## v1.10.0-rc.3
|
||||
|
||||
* Use default config instead of raising an error if config file cannot be found
|
||||
* Ignore NVIDIA_REQUIRE_JETPACK* environment variables for requirement checks
|
||||
* Fix bug in detection of Tegra systems where `/sys/devices/soc0/family` is ignored
|
||||
* Fix bug where links to devices were detected as devices
|
||||
* [libnvida-container] Fix bug introduced when adding libcudadebugger.so to list of libraries
|
||||
|
||||
## v1.10.0-rc.2
|
||||
|
||||
* Add support for NVIDIA_REQUIRE_* checks for cuda version and arch to csv mode
|
||||
* Switch to debug logging to reduce log verbosity
|
||||
* Support logging to logs requested in command line
|
||||
* Fix bug when launching containers with relative root path (e.g. using containerd)
|
||||
* Allow low-level runtime path to be set explicitly as nvidia-container-runtime.runtimes option
|
||||
* Fix failure to locate low-level runtime if PATH envvar is unset
|
||||
* Replace experimental option for NVIDIA Container Runtime with nvidia-container-runtime.mode = csv option
|
||||
* Use csv as default mode on Tegra systems without NVML
|
||||
* Add --version flag to all CLIs
|
||||
* [libnvidia-container] Bump libtirpc to 1.3.2
|
||||
* [libnvidia-container] Fix bug when running host ldconfig using glibc compiled with a non-standard prefix
|
||||
* [libnvidia-container] Add libcudadebugger.so to list of compute libraries
|
||||
|
||||
## v1.10.0-rc.1
|
||||
|
||||
* Include nvidia-ctk CLI in installed binaries
|
||||
* Add experimental option to NVIDIA Container Runtime
|
||||
|
||||
## v1.9.0
|
||||
|
||||
* [libnvidia-container] Add additional check for Tegra in /sys/.../family file in CLI
|
||||
* [libnvidia-container] Update jetpack-specific CLI option to only load Base CSV files by default
|
||||
* [libnvidia-container] Fix bug (from 1.8.0) when mounting GSP firmware into containers without /lib to /usr/lib symlinks
|
||||
* [libnvidia-container] Update nvml.h to CUDA 11.6.1 nvML_DEV 11.6.55
|
||||
* [libnvidia-container] Update switch statement to include new brands from latest nvml.h
|
||||
* [libnvidia-container] Process all --require flags on Jetson platforms
|
||||
* [libnvidia-container] Fix long-standing issue with running ldconfig on Debian systems
|
||||
|
||||
## v1.8.1
|
||||
|
||||
* [libnvidia-container] Fix bug in determining cgroup root when running in nested containers
|
||||
* [libnvidia-container] Fix permission issue when determining cgroup version
|
||||
|
||||
## v1.8.0
|
||||
|
||||
* Promote 1.8.0-rc.2-1 to 1.8.0
|
||||
|
||||
## v1.8.0-rc.2
|
||||
|
||||
* Remove support for building amazonlinux1 packages
|
||||
|
||||
## v1.8.0-rc.1
|
||||
|
||||
* [libnvidia-container] Add support for cgroupv2
|
||||
* Release toolkit-container images from nvidia-container-toolkit repository
|
||||
|
||||
## v1.7.0
|
||||
|
||||
* Promote 1.7.0-rc.1-1 to 1.7.0
|
||||
* Bump Golang version to 1.16.4
|
||||
|
||||
## v1.7.0-rc.1
|
||||
|
||||
* Specify containerd runtime type as string in config tools to remove dependency on containerd package
|
||||
* Add supported-driver-capabilities config option to allow for a subset of all driver capabilities to be specified
|
||||
|
||||
## v1.6.0
|
||||
|
||||
* Promote 1.6.0-rc.3-1 to 1.6.0
|
||||
* Fix unnecessary logging to stderr instead of configured nvidia-container-runtime log file
|
||||
|
||||
## v1.6.0-rc.3
|
||||
|
||||
* Add supported-driver-capabilities config option to the nvidia-container-toolkit
|
||||
* Move OCI and command line checks for runtime to internal oci package
|
||||
|
||||
## v1.6.0-rc.2
|
||||
|
||||
* Use relative path to OCI specification file (config.json) if bundle path is not specified as an argument to the nvidia-container-runtime
|
||||
|
||||
## v1.6.0-rc.1
|
||||
|
||||
* Add AARCH64 package for Amazon Linux 2
|
||||
* Include nvidia-container-runtime into nvidia-container-toolkit package
|
||||
|
||||
## v1.5.1
|
||||
|
||||
* Fix bug where Docker Swarm device selection is ignored if NVIDIA_VISIBLE_DEVICES is also set
|
||||
* Improve unit testing by using require package and adding coverage reports
|
||||
* Remove unneeded go dependencies by running go mod tidy
|
||||
* Move contents of pkg directory to cmd for CLI tools
|
||||
* Ensure make binary target explicitly sets GOOS
|
||||
|
||||
## v1.5.0
|
||||
|
||||
* Add dependence on libnvidia-container-tools >= 1.4.0
|
||||
* Add golang check targets to Makefile
|
||||
* Add Jenkinsfile definition for build targets
|
||||
* Move docker.mk to docker folder
|
||||
|
||||
## v1.4.2
|
||||
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.3
|
||||
|
||||
## v1.4.1
|
||||
|
||||
* Ignore NVIDIA_VISIBLE_DEVICES for containers with insufficent privileges
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.2
|
||||
|
||||
## v1.4.0
|
||||
|
||||
* Add 'compute' capability to list of defaults
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.1
|
||||
|
||||
## v1.3.0
|
||||
|
||||
* Promote 1.3.0-rc.2-1 to 1.3.0
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.0
|
||||
|
||||
## v1.3.0-rc.2
|
||||
|
||||
* 2c180947 Add more tests for new semantics with device list from volume mounts
|
||||
* 7c003857 Refactor accepting device lists from volume mounts as a boolean
|
||||
|
||||
## v1.3.0-rc.1
|
||||
|
||||
* b50d86c1 Update build system to accept a TAG variable for things like rc.x
|
||||
* fe65573b Add common CI tests for things like golint, gofmt, unit tests, etc.
|
||||
* da6fbb34 Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
|
||||
* a7fb3330 Flip build-all targets to run automatically on merge requests
|
||||
* 8b248b66 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
|
||||
* da36874e Add new config options to pull device list from mounted files instead of ENVVAR
|
||||
|
||||
## v1.2.1
|
||||
|
||||
* 4e6e0ed4 Add 'ngx' to list of*all* driver capabilities
|
||||
* 2f4af743 List config.toml as a config file in the RPM SPEC
|
||||
|
||||
## v1.2.0
|
||||
|
||||
* 8e0aab46 Fix repo listed in changelog for debian distributions
|
||||
* 320bb6e4 Update dependence on libnvidia-container to 1.2.0
|
||||
* 6cfc8097 Update package license to match source license
|
||||
* e7dc3cbb Fix debian copyright file
|
||||
* d3aee3e0 Add the 'ngx' driver capability
|
||||
|
||||
## v1.1.2
|
||||
|
||||
* c32237f3 Add support for parsing Linux Capabilities for older OCI specs
|
||||
|
||||
## v1.1.1
|
||||
|
||||
* d202aded Update dependence to libnvidia-container 1.1.1
|
||||
|
||||
## v1.1.0
|
||||
|
||||
* 4e4de762 Update build system to support multi-arch builds
|
||||
* fcc1d116 Add support for MIG (Multi-Instance GPUs)
|
||||
* d4ff0416 Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*
|
||||
* 60f165ad Add no-pivot option to toolkit
|
||||
|
||||
## v1.0.5
|
||||
|
||||
* Initial release. Replaces older package nvidia-container-runtime-hook. (Closes: #XXXXXX)
|
||||
@@ -13,7 +13,7 @@ The `nvidia-container-toolkit` resides in this repo directly.
|
||||
|
||||
In oder to build the packages, the following command is executed
|
||||
```sh
|
||||
./scripts/build-all-components.sh TARGET
|
||||
./scripts/build-packages.sh TARGET
|
||||
```
|
||||
where `TARGET` is a make target that is valid for each of the sub-components.
|
||||
|
||||
@@ -21,6 +21,8 @@ These include:
|
||||
* `ubuntu18.04-amd64`
|
||||
* `centos8-x86_64`
|
||||
|
||||
If no `TARGET` is specified, all valid release targets are built.
|
||||
|
||||
The packages are generated in the `dist` folder.
|
||||
|
||||
## Testing local changes
|
||||
@@ -37,9 +39,23 @@ The [test/release](./test/release/) folder contains documentation on how the ins
|
||||
|
||||
## Releasing
|
||||
|
||||
A utility script [`scripts/release.sh`](./scripts/release.sh) is provided to build
|
||||
packages required for release. If run without arguments, all supported distribution-architecture combinations are built. A specific distribution-architecture pair can also be provided
|
||||
```sh
|
||||
./scripts/release.sh ubuntu18.04-amd64
|
||||
In order to release packages required for a release, a utility script
|
||||
[`scripts/release-packages.sh`](./scripts/release-packages.sh) is provided.
|
||||
This script can be executed as follows:
|
||||
|
||||
```bash
|
||||
GPG_LOCAL_USER="GPG_USER" \
|
||||
MASTER_KEY_PATH=/path/to/gpg-master.key \
|
||||
SUB_KEY_PATH=/path/to/gpg-subkey.key \
|
||||
./scripts/release-packages.sh REPO PACKAGE_REPO_ROOT [REFERENCE]
|
||||
```
|
||||
where the `amd64` builds for `ubuntu18.04` are provided as an example.
|
||||
|
||||
Where `REPO` is one of `stable` or `experimental`, `PACKAGE_REPO_ROOT` is the local path to the `libnvidia-container` repository checked out to the `gh-pages` branch, and `REFERENCE` is the git SHA that is to be released. If reference is not specified `HEAD` is assumed.
|
||||
|
||||
This scripts performs the following basic functions:
|
||||
* Pulls the package image defined by the `REFERENCE` git SHA from the staging registry,
|
||||
* Copies the required packages to the package repository at `PACKAGE_REPO_ROOT/REPO`,
|
||||
* Signs the packages using the specified GPG keys
|
||||
|
||||
While the last two are performed, commits are added to the package repository. These can be pushed to the relevant repository.
|
||||
|
||||
|
||||
16
Makefile
16
Makefile
@@ -38,16 +38,21 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
|
||||
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
|
||||
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
|
||||
|
||||
$(info CMD_TARGETS=$(CMD_TARGETS))
|
||||
|
||||
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate $(CHECK_TARGETS)
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
|
||||
|
||||
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
|
||||
|
||||
DOCKER_TARGETS := $(patsubst %,docker-%, $(TARGETS))
|
||||
.PHONY: $(TARGETS) $(DOCKER_TARGETS)
|
||||
|
||||
ifeq ($(VERSION),)
|
||||
CLI_VERSION = $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
else
|
||||
CLI_VERSION = $(VERSION)
|
||||
endif
|
||||
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
|
||||
|
||||
GOOS ?= linux
|
||||
|
||||
binaries: cmds
|
||||
@@ -56,7 +61,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
|
||||
endif
|
||||
cmds: $(CMD_TARGETS)
|
||||
$(CMD_TARGETS): cmd-%:
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
|
||||
build:
|
||||
GOOS=$(GOOS) go build ./...
|
||||
@@ -98,6 +103,9 @@ misspell:
|
||||
vet:
|
||||
go vet $(MODULE)/...
|
||||
|
||||
licenses:
|
||||
go-licenses csv $(MODULE)/...
|
||||
|
||||
COVERAGE_FILE := coverage.out
|
||||
test: build cmds
|
||||
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# NVIDIA Container Toolkit
|
||||
|
||||
[](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/master/LICENSE)
|
||||
[](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/main/LICENSE)
|
||||
[](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/overview.html)
|
||||
[](https://nvidia.github.io/libnvidia-container)
|
||||
|
||||
|
||||
@@ -67,9 +67,9 @@ ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
|
||||
yum localinstall -y \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-1.*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*-${PACKAGE_VERSION}*.rpm
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
@@ -85,7 +85,7 @@ LABEL release="N/A"
|
||||
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
||||
LABEL description="See summary"
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
# Install / upgrade packages here that are required to resolve CVEs
|
||||
ARG CVE_UPDATES
|
||||
|
||||
@@ -25,5 +25,7 @@ ARG ARTIFACTS_ROOT
|
||||
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
|
||||
WORKDIR /artifacts/packages
|
||||
# Create a manifest.txt file with the absolute paths of all deb and rpm packages in the container
|
||||
RUN find /artifacts/packages -iname '*.deb' -o -iname '*.rpm' > /artifacts/manifest.txt
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
@@ -42,6 +42,9 @@ RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" .
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
|
||||
RUN rm -f /etc/apt/sources.list.d/cuda.list
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libcap2 \
|
||||
@@ -72,9 +75,9 @@ RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
|
||||
fi
|
||||
|
||||
RUN dpkg -i \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit_${PACKAGE_VERSION}*.deb
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*_${PACKAGE_VERSION}*.deb
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
@@ -90,7 +93,7 @@ LABEL release="N/A"
|
||||
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
||||
LABEL description="See summary"
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
# Install / upgrade packages here that are required to resolve CVEs
|
||||
ARG CVE_UPDATES
|
||||
|
||||
@@ -43,8 +43,8 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
|
||||
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
|
||||
|
||||
##### Public rules #####
|
||||
DEFAULT_PUSH_TARGET := ubuntu18.04
|
||||
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
|
||||
DEFAULT_PUSH_TARGET := ubuntu20.04
|
||||
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7
|
||||
|
||||
META_TARGETS := packaging
|
||||
|
||||
@@ -110,10 +110,10 @@ build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
build-ubi8: PACKAGE_DIST = centos8
|
||||
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-centos%: BASE_DIST = $(*)
|
||||
build-centos%: DOCKERFILE_SUFFIX := centos
|
||||
build-centos%: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos%: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
build-centos7: BASE_DIST = $(*)
|
||||
build-centos7: DOCKERFILE_SUFFIX := centos
|
||||
build-centos7: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos7: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-packaging: BASE_DIST := ubuntu20.04
|
||||
build-packaging: DOCKERFILE_SUFFIX := packaging
|
||||
|
||||
@@ -30,5 +30,8 @@ push-short:
|
||||
# We only have x86_64 packages for centos7
|
||||
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate amd64 image for ubuntu18.04
|
||||
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate a single image for packaging targets
|
||||
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
@@ -7,14 +7,12 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
var envSwarmGPU *string
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
@@ -104,32 +102,6 @@ type HookState struct {
|
||||
BundlePath string `json:"bundlePath"`
|
||||
}
|
||||
|
||||
func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d.%d.%d\n", &vmaj, &vmin, &vpatch); err != nil {
|
||||
vpatch = 0
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d.%d\n", &vmaj, &vmin); err != nil {
|
||||
vmin = 0
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d\n", &vmaj); err != nil {
|
||||
log.Panicln("invalid CUDA version:", cudaVersion)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func getEnvMap(e []string) (m map[string]string) {
|
||||
m = make(map[string]string)
|
||||
for _, s := range e {
|
||||
p := strings.SplitN(s, "=", 2)
|
||||
if len(p) != 2 {
|
||||
log.Panicln("environment error")
|
||||
}
|
||||
m[p[0]] = p[1]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func loadSpec(path string) (spec *Spec) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
@@ -191,49 +163,31 @@ func isPrivileged(s *Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func isLegacyCUDAImage(env map[string]string) bool {
|
||||
legacyCudaVersion := env[envCUDAVersion]
|
||||
cudaRequire := env[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
||||
// Build a list of envvars to consider.
|
||||
envVars := []string{envNVVisibleDevices}
|
||||
if envSwarmGPU != nil {
|
||||
// The Swarm envvar has higher precedence.
|
||||
envVars = append([]string{*envSwarmGPU}, envVars...)
|
||||
}
|
||||
|
||||
// Grab a reference to devices from the first envvar
|
||||
// in the list that actually exists in the environment.
|
||||
var devices *string
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := env[envVar]; ok {
|
||||
devices = &devs
|
||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||
// if specified.
|
||||
var hasSwarmEnvvar bool
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if _, exists := image[envvar]; exists {
|
||||
hasSwarmEnvvar = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if devices == nil && legacyImage {
|
||||
all := "all"
|
||||
return &all
|
||||
var devices []string
|
||||
if hasSwarmEnvvar {
|
||||
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||
} else {
|
||||
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Environment variable set to "none": reset to "".
|
||||
if *devices == "none" {
|
||||
empty := ""
|
||||
return &empty
|
||||
}
|
||||
devicesString := strings.Join(devices, ",")
|
||||
|
||||
// Any other value.
|
||||
return devices
|
||||
return &devicesString
|
||||
}
|
||||
|
||||
func getDevicesFromMounts(mounts []Mount) *string {
|
||||
@@ -273,7 +227,7 @@ func getDevicesFromMounts(mounts []Mount) *string {
|
||||
return &ret
|
||||
}
|
||||
|
||||
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
|
||||
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := getDevicesFromMounts(mounts)
|
||||
@@ -283,7 +237,7 @@ func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, p
|
||||
}
|
||||
|
||||
// Fallback to reading from the environment variable if privileges are correct
|
||||
devices := getDevicesFromEnvvar(env, legacyImage)
|
||||
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
||||
if devices == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -335,27 +289,11 @@ func getDriverCapabilities(env map[string]string, supportedDriverCapabilities Dr
|
||||
return capabilities
|
||||
}
|
||||
|
||||
func getRequirements(env map[string]string, legacyImage bool) []string {
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range env {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
if legacyImage {
|
||||
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
|
||||
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
}
|
||||
return requirements
|
||||
}
|
||||
|
||||
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := isLegacyCUDAImage(env)
|
||||
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := image.IsLegacy()
|
||||
|
||||
var devices string
|
||||
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
||||
if d := getDevices(hookConfig, image, mounts, privileged); d != nil {
|
||||
devices = *d
|
||||
} else {
|
||||
// 'nil' devices means this is not a GPU container.
|
||||
@@ -363,7 +301,7 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
|
||||
}
|
||||
|
||||
var migConfigDevices string
|
||||
if d := getMigConfigDevices(env); d != nil {
|
||||
if d := getMigConfigDevices(image); d != nil {
|
||||
migConfigDevices = *d
|
||||
}
|
||||
if !privileged && migConfigDevices != "" {
|
||||
@@ -371,19 +309,21 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
|
||||
}
|
||||
|
||||
var migMonitorDevices string
|
||||
if d := getMigMonitorDevices(env); d != nil {
|
||||
if d := getMigMonitorDevices(image); d != nil {
|
||||
migMonitorDevices = *d
|
||||
}
|
||||
if !privileged && migMonitorDevices != "" {
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
driverCapabilities := getDriverCapabilities(env, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
|
||||
requirements := getRequirements(env, legacyImage)
|
||||
requirements, err := image.GetRequirements()
|
||||
if err != nil {
|
||||
log.Panicln("failed to get requirements", err)
|
||||
}
|
||||
|
||||
// Don't fail on invalid values.
|
||||
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
||||
disableRequire := image.HasDisableRequire()
|
||||
|
||||
return &nvidiaConfig{
|
||||
Devices: devices,
|
||||
@@ -409,13 +349,16 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
env := getEnvMap(s.Process.Env)
|
||||
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
privileged := isPrivileged(s)
|
||||
envSwarmGPU = hook.SwarmResource
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Env: env,
|
||||
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
||||
Env: image,
|
||||
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@@ -68,7 +70,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description: "Legacy image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "",
|
||||
envNVVisibleDevices: "void",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
@@ -225,7 +227,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description: "Modern image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "",
|
||||
envNVVisibleDevices: "void",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
@@ -448,6 +450,44 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, swarmResource overrides device selection",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
s := "DOCKER_SWARM_RESOURCE"
|
||||
return &s
|
||||
}(),
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "GPU1,GPU2",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, comma separated swarmResource is split and overrides device selection",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
s := "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE"
|
||||
return &s
|
||||
}(),
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "GPU1,GPU2",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
@@ -671,7 +711,7 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
||||
hookConfig := getDefaultHookConfig()
|
||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false)
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
@@ -688,13 +728,13 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||
gpuID := "GPU-12345"
|
||||
anotherGPUID := "GPU-67890"
|
||||
thirdGPUID := "MIG-12345"
|
||||
|
||||
var tests = []struct {
|
||||
description string
|
||||
envSwarmGPU *string
|
||||
env map[string]string
|
||||
legacyImage bool
|
||||
expectedDevices *string
|
||||
description string
|
||||
swarmResourceEnvvars []string
|
||||
env map[string]string
|
||||
expectedDevices *string
|
||||
}{
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
@@ -729,13 +769,15 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
legacyImage: true,
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
expectedDevices: &all,
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||
@@ -781,86 +823,116 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &all,
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||
// enabled
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
},
|
||||
{
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "none",
|
||||
},
|
||||
expectedDevices: &empty,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
},
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: func() *string {
|
||||
result := fmt.Sprintf("%s,%s", thirdGPUID, anotherGPUID)
|
||||
return &result
|
||||
}(),
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
envSwarmGPU = tc.envSwarmGPU
|
||||
devices := getDevicesFromEnvvar(tc.env, tc.legacyImage)
|
||||
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
|
||||
if tc.expectedDevices == nil {
|
||||
require.Nil(t, devices, "%d: %v", i, tc)
|
||||
return
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
@@ -34,7 +35,7 @@ type CLIConfig struct {
|
||||
Ldconfig *string `toml:"ldconfig"`
|
||||
}
|
||||
|
||||
// HookConfig : options for the nvidia-container-toolkit.
|
||||
// HookConfig : options for the nvidia-container-runtime-hook.
|
||||
type HookConfig struct {
|
||||
DisableRequire bool `toml:"disable-require"`
|
||||
SwarmResource *string `toml:"swarm-resource"`
|
||||
@@ -116,3 +117,22 @@ func (c HookConfig) getConfigOption(fieldName string) string {
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||
func (c *HookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c.SwarmResource == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
candidates := strings.Split(*c.SwarmResource, ",")
|
||||
|
||||
var envvars []string
|
||||
for _, c := range candidates {
|
||||
trimmed := strings.TrimSpace(c)
|
||||
if len(trimmed) > 0 {
|
||||
envvars = append(envvars, trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
return envvars
|
||||
}
|
||||
@@ -103,3 +103,59 @@ func TestGetHookConfig(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetSwarmResourceEnvvars(t *testing.T) {
|
||||
testCases := []struct {
|
||||
value string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
value: "nil",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: "",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: " ",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: "single",
|
||||
expected: []string{"single"},
|
||||
},
|
||||
{
|
||||
value: "single ",
|
||||
expected: []string{"single"},
|
||||
},
|
||||
{
|
||||
value: "one,two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
{
|
||||
value: "one ,two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
{
|
||||
value: "one, two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
c := &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
if tc.value == "nil" {
|
||||
return nil
|
||||
}
|
||||
return &tc.value
|
||||
}(),
|
||||
}
|
||||
|
||||
envvars := c.getSwarmResourceEnvvars()
|
||||
require.EqualValues(t, tc.expected, envvars)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -7,51 +7,6 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseCudaVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
expected [3]uint32
|
||||
}{
|
||||
{"0", [3]uint32{0, 0, 0}},
|
||||
{"8", [3]uint32{8, 0, 0}},
|
||||
{"7.5", [3]uint32{7, 5, 0}},
|
||||
{"9.0.116", [3]uint32{9, 0, 116}},
|
||||
{"4294967295.4294967295.4294967295", [3]uint32{4294967295, 4294967295, 4294967295}},
|
||||
}
|
||||
for i, c := range tests {
|
||||
vmaj, vmin, vpatch := parseCudaVersion(c.version)
|
||||
|
||||
version := [3]uint32{vmaj, vmin, vpatch}
|
||||
|
||||
require.Equal(t, c.expected, version, "%d: %v", i, c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCudaVersionInvalid(t *testing.T) {
|
||||
var tests = []string{
|
||||
"foo",
|
||||
"foo.5.10",
|
||||
"9.0.116.50",
|
||||
"9.0.116foo",
|
||||
"7.foo",
|
||||
"9.0.bar",
|
||||
"9.4294967296",
|
||||
"9.0.116.",
|
||||
"9..0",
|
||||
"9.",
|
||||
".5.10",
|
||||
"-9",
|
||||
"+9",
|
||||
"-9.1.116",
|
||||
"-9.-1.-116",
|
||||
}
|
||||
for _, c := range tests {
|
||||
require.Panics(t, func() {
|
||||
parseCudaVersion(c)
|
||||
}, "parseCudaVersion(%v)", c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPrivileged(t *testing.T) {
|
||||
var tests = []struct {
|
||||
spec string
|
||||
@@ -6,21 +6,21 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
var (
|
||||
debugflag = flag.Bool("debug", false, "enable debug output")
|
||||
forceflag = flag.Bool("force", false, "force execution of prestart hook in experimental mode")
|
||||
configflag = flag.String("config", "", "configuration file")
|
||||
|
||||
defaultPATH = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
|
||||
debugflag = flag.Bool("debug", false, "enable debug output")
|
||||
versionflag = flag.Bool("version", false, "enable version output")
|
||||
configflag = flag.String("config", "", "configuration file")
|
||||
)
|
||||
|
||||
func exit() {
|
||||
@@ -36,28 +36,16 @@ func exit() {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func getPATH(config CLIConfig) string {
|
||||
dirs := filepath.SplitList(os.Getenv("PATH"))
|
||||
// directories from the hook environment have higher precedence
|
||||
dirs = append(dirs, defaultPATH...)
|
||||
|
||||
if config.Root != nil {
|
||||
rootDirs := []string{}
|
||||
for _, dir := range dirs {
|
||||
rootDirs = append(rootDirs, path.Join(*config.Root, dir))
|
||||
}
|
||||
// directories with the root prefix have higher precedence
|
||||
dirs = append(rootDirs, dirs...)
|
||||
}
|
||||
return strings.Join(dirs, ":")
|
||||
}
|
||||
|
||||
func getCLIPath(config CLIConfig) string {
|
||||
if config.Path != nil {
|
||||
return *config.Path
|
||||
}
|
||||
|
||||
if err := os.Setenv("PATH", getPATH(config)); err != nil {
|
||||
var root string
|
||||
if config.Root != nil {
|
||||
root = *config.Root
|
||||
}
|
||||
if err := os.Setenv("PATH", lookup.GetPath(root)); err != nil {
|
||||
log.Panicln("couldn't set PATH variable:", err)
|
||||
}
|
||||
|
||||
@@ -86,8 +74,8 @@ func doPrestart() {
|
||||
hook := getHookConfig()
|
||||
cli := hook.NvidiaContainerCLI
|
||||
|
||||
if hook.NVIDIAContainerRuntime.Experimental && !*forceflag {
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.")
|
||||
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
|
||||
}
|
||||
|
||||
container := getContainerConfig(hook)
|
||||
@@ -172,6 +160,11 @@ func main() {
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
|
||||
if *versionflag {
|
||||
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime Hook", info.GetVersionString())
|
||||
return
|
||||
}
|
||||
|
||||
args := flag.Args()
|
||||
if len(args) == 0 {
|
||||
flag.Usage()
|
||||
@@ -191,3 +184,12 @@ func main() {
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
// logInterceptor implements the info.Logger interface to allow for logging from this function.
|
||||
type logInterceptor struct{}
|
||||
|
||||
func (l *logInterceptor) Infof(format string, args ...interface{}) {
|
||||
log.Printf(format, args...)
|
||||
}
|
||||
|
||||
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}
|
||||
@@ -2,24 +2,86 @@
|
||||
|
||||
The NVIDIA Container Runtime is a shim for OCI-compliant low-level runtimes such as [runc](https://github.com/opencontainers/runc). When a `create` command is detected, the incoming [OCI runtime specification](https://github.com/opencontainers/runtime-spec) is modified in place and the command is forwarded to the low-level runtime.
|
||||
|
||||
## Standard Mode
|
||||
## Configuration
|
||||
|
||||
In the standard mode configuration, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from project [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
|
||||
The NVIDIA Container Runtime uses file-based configuration, with the config stored in `/etc/nvidia-container-runtime/config.toml`. The `/etc` path can be overridden using the `XDG_CONFIG_HOME` environment variable with the `${XDG_CONFIG_HOME}/nvidia-container-runtime/config.toml` file used instead if this environment variable is set.
|
||||
|
||||
## Experimental Mode
|
||||
This config file may contain options for other components of the NVIDIA container stack and for the NVIDIA Container Runtime, the relevant config section is `nvidia-container-runtime`
|
||||
|
||||
The NVIDIA Container Runtime can be configured in an experimental mode by setting the following options in the runtime's `config.toml` file:
|
||||
### Logging
|
||||
|
||||
The `log-level` config option (default: `"info"`) specifies the log level to use and the `debug` option, if set, specifies a log file to which logs for the NVIDIA Container Runtime must be written.
|
||||
|
||||
In addition to this, the NVIDIA Container Runtime considers the value of `--log` and `--log-format` flags that may be passed to it by a container runtime such as docker or containerd. If the `--debug` flag is present the log-level specified in the config file is overridden as `"debug"`.
|
||||
|
||||
### Low-level Runtime Path
|
||||
|
||||
The `runtimes` config option allows for the low-level runtime to be specified. The first entry in this list that is an existing executable file is used as the low-level runtime. If the entry is not a path, the `PATH` is searched for a matching executable. If the entry is a path this is checked instead.
|
||||
|
||||
The default value for this setting is:
|
||||
```toml
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
```
|
||||
|
||||
and if, for example, `crun` is to be used instead this can be changed to:
|
||||
```toml
|
||||
runtimes = [
|
||||
"crun",
|
||||
]
|
||||
```
|
||||
|
||||
### Runtime Mode
|
||||
|
||||
The `mode` config option (default `"auto"`) controls the high-level behaviour of the runtime.
|
||||
|
||||
#### Auto Mode
|
||||
|
||||
When `mode` is set to `"auto"`, the runtime employs heuristics to determine which mode to use based on, for example, the platform where the runtime is being run.
|
||||
|
||||
#### Legacy Mode
|
||||
|
||||
When `mode` is set to `"legacy"`, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from the [libnvidia-container](https://github.com/NVIDIA/libnvidia-container) project.
|
||||
|
||||
#### CSV Mode
|
||||
|
||||
When `mode` is set to `"csv"`, CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` define the devices and mounts that are to be injected into a container when it is created. The search path for the files can be overridden by modifying the `nvidia-container-runtime.modes.csv.mount-spec-path` in the config as below:
|
||||
|
||||
```toml
|
||||
[nvidia-container-runtime]
|
||||
experimental = true
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
```
|
||||
|
||||
When this setting is enabled, the modifications made to the OCI specification are controlled by the `nvidia-container-runtime.discover-mode` option, with the following mode supported:
|
||||
|
||||
* `"legacy"`: This mode mirrors the behaviour of the standard mode, inserting the NVIDIA Container Runtime Hook as a `prestart` hook into the container's OCI specification.
|
||||
* `"csv"`: This mode uses CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` to define the devices and mounts that are to be injected into a container when it is created.
|
||||
This mode is primarily targeted at Tegra-based systems without NVML available.
|
||||
|
||||
### Notes on using the docker CLI
|
||||
|
||||
The `docker` CLI supports the `--gpus` flag to select GPUs for inclusion in a container. Since specifying this flag inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification. When experimental mode is activated, the NVIDIA Container Runtime detects the presence of the hook and raises an error. This requirement will be relaxed in the near future.
|
||||
Note that only the `"legacy"` NVIDIA Container Runtime mode is directly compatible with the `--gpus` flag implemented by the `docker` CLI (assuming the NVIDIA Container Runtime is not used). The reason for this is that `docker` inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification.
|
||||
|
||||
|
||||
If a different mode is explicitly set or detected, the NVIDIA Container Runtime Hook will raise the following error when `--gpus` is set:
|
||||
```
|
||||
$ docker run --rm --gpus all ubuntu:18.04
|
||||
docker: Error response from daemon: failed to create shim: OCI runtime create failed: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: Running hook #0:: error running hook: exit status 1, stdout: , stderr: Auto-detected mode as 'csv'
|
||||
invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.: unknown.
|
||||
```
|
||||
Here NVIDIA Container Runtime must be used explicitly. The recommended way to do this is to specify the `--runtime=nvidia` command line argument as part of the `docker run` commmand as follows:
|
||||
```
|
||||
$ docker run --rm --gpus all --runtime=nvidia ubuntu:18.04
|
||||
```
|
||||
|
||||
Alternatively the NVIDIA Container Runtime can be set as the default runtime for docker. This can be done by modifying the `/etc/docker/daemon.json` file as follows:
|
||||
```json
|
||||
{
|
||||
"default-runtime": "nvidia",
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"path": "nvidia-container-runtime",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -20,60 +20,220 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/tsaikd/KDGoLib/logrusutil"
|
||||
)
|
||||
|
||||
// Logger adds a way to manage output to a log file to a logrus.Logger
|
||||
type Logger struct {
|
||||
*logrus.Logger
|
||||
previousOutput io.Writer
|
||||
logFile *os.File
|
||||
previousLogger *logrus.Logger
|
||||
logFiles []*os.File
|
||||
}
|
||||
|
||||
// NewLogger constructs a Logger with a preddefined formatter
|
||||
// NewLogger creates an empty logger
|
||||
func NewLogger() *Logger {
|
||||
logrusLogger := logrus.New()
|
||||
|
||||
formatter := &logrusutil.ConsoleLogFormatter{
|
||||
TimestampFormat: "2006/01/02 15:04:07",
|
||||
Flag: logrusutil.Ltime,
|
||||
return &Logger{
|
||||
Logger: logrus.New(),
|
||||
}
|
||||
|
||||
logger := &Logger{
|
||||
Logger: logrusLogger,
|
||||
}
|
||||
logger.SetFormatter(formatter)
|
||||
|
||||
return logger
|
||||
}
|
||||
|
||||
// LogToFile opens the specified file for appending and sets the logger to
|
||||
// output to the opened file. A reference to the file pointer is stored to
|
||||
// allow this to be closed.
|
||||
func (l *Logger) LogToFile(filename string) error {
|
||||
logFile, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening debug log file: %v", err)
|
||||
// UpdateLogger constructs a Logger with a preddefined formatter
|
||||
func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, error) {
|
||||
configFromArgs := parseArgs(argv)
|
||||
|
||||
level, logLevelError := configFromArgs.getLevel(logLevel)
|
||||
|
||||
var logFiles []*os.File
|
||||
var argLogFileError error
|
||||
|
||||
// We don't create log files if the version argument is supplied
|
||||
if !configFromArgs.version {
|
||||
configLogFile, err := createLogFile(filename)
|
||||
if err != nil {
|
||||
return logger, fmt.Errorf("error opening debug log file: %v", err)
|
||||
}
|
||||
if configLogFile != nil {
|
||||
logFiles = append(logFiles, configLogFile)
|
||||
}
|
||||
|
||||
argLogFile, err := createLogFile(configFromArgs.file)
|
||||
if argLogFile != nil {
|
||||
logFiles = append(logFiles, argLogFile)
|
||||
}
|
||||
argLogFileError = err
|
||||
}
|
||||
|
||||
l.logFile = logFile
|
||||
l.previousOutput = l.Out
|
||||
l.SetOutput(logFile)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CloseFile closes the log file (if any) and resets the logger output to what it
|
||||
// was before LogToFile was called.
|
||||
func (l *Logger) CloseFile() error {
|
||||
if l.logFile == nil {
|
||||
return nil
|
||||
l := &Logger{
|
||||
Logger: logrus.New(),
|
||||
previousLogger: logger.Logger,
|
||||
logFiles: logFiles,
|
||||
}
|
||||
logFile := l.logFile
|
||||
l.SetOutput(l.previousOutput)
|
||||
l.logFile = nil
|
||||
|
||||
return logFile.Close()
|
||||
l.SetLevel(level)
|
||||
if level == logrus.DebugLevel {
|
||||
logrus.SetReportCaller(true)
|
||||
// Shorten function and file names reported by the logger, by
|
||||
// trimming common "github.com/opencontainers/runc" prefix.
|
||||
// This is only done for text formatter.
|
||||
_, file, _, _ := runtime.Caller(0)
|
||||
prefix := filepath.Dir(file) + "/"
|
||||
logrus.SetFormatter(&logrus.TextFormatter{
|
||||
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
|
||||
function := strings.TrimPrefix(f.Function, prefix) + "()"
|
||||
fileLine := strings.TrimPrefix(f.File, prefix) + ":" + strconv.Itoa(f.Line)
|
||||
return function, fileLine
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
if configFromArgs.format == "json" {
|
||||
l.SetFormatter(new(logrus.JSONFormatter))
|
||||
}
|
||||
|
||||
if len(logFiles) == 0 {
|
||||
l.SetOutput(io.Discard)
|
||||
} else if len(logFiles) == 1 {
|
||||
l.SetOutput(logFiles[0])
|
||||
} else if len(logFiles) > 1 {
|
||||
var writers []io.Writer
|
||||
for _, f := range logFiles {
|
||||
writers = append(writers, f)
|
||||
}
|
||||
l.SetOutput(io.MultiWriter(writers...))
|
||||
}
|
||||
|
||||
if logLevelError != nil {
|
||||
l.Warn(logLevelError)
|
||||
}
|
||||
|
||||
if argLogFileError != nil {
|
||||
l.Warnf("Failed to open log file: %v", argLogFileError)
|
||||
}
|
||||
|
||||
return l, nil
|
||||
}
|
||||
|
||||
// Reset closes the log file (if any) and resets the logger output to what it
|
||||
// was before UpdateLogger was called.
|
||||
func (l *Logger) Reset() error {
|
||||
defer func() {
|
||||
previous := l.previousLogger
|
||||
if previous == nil {
|
||||
previous = logrus.New()
|
||||
}
|
||||
logger = &Logger{Logger: previous}
|
||||
}()
|
||||
|
||||
var errs []error
|
||||
for _, f := range l.logFiles {
|
||||
err := f.Close()
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
|
||||
var err error
|
||||
for _, e := range errs {
|
||||
if err == nil {
|
||||
err = e
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("%v; %w", e, err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func createLogFile(filename string) (*os.File, error) {
|
||||
if filename != "" && filename != os.DevNull {
|
||||
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type loggerConfig struct {
|
||||
file string
|
||||
format string
|
||||
debug bool
|
||||
version bool
|
||||
}
|
||||
|
||||
func (c loggerConfig) getLevel(logLevel string) (logrus.Level, error) {
|
||||
if c.debug {
|
||||
return logrus.DebugLevel, nil
|
||||
}
|
||||
|
||||
if logLevel, err := logrus.ParseLevel(logLevel); err == nil {
|
||||
return logLevel, nil
|
||||
}
|
||||
|
||||
return logrus.InfoLevel, fmt.Errorf("invalid log-level '%v'", logLevel)
|
||||
}
|
||||
|
||||
// Informed by Taken from https://github.com/opencontainers/runc/blob/7fd8b57001f5bfa102e89cb434d96bf71f7c1d35/main.go#L182
|
||||
func parseArgs(args []string) loggerConfig {
|
||||
c := loggerConfig{}
|
||||
|
||||
expected := map[string]*string{
|
||||
"log-format": &c.format,
|
||||
"log": &c.file,
|
||||
}
|
||||
|
||||
found := make(map[string]bool)
|
||||
|
||||
for i := 0; i < len(args); i++ {
|
||||
if len(found) == 4 {
|
||||
break
|
||||
}
|
||||
|
||||
param := args[i]
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
trimmed := strings.TrimLeft(parts[0], "-")
|
||||
// If this is not a flag we continue
|
||||
if parts[0] == trimmed {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the version flag
|
||||
if trimmed == "version" {
|
||||
c.version = true
|
||||
found["version"] = true
|
||||
// For the version flag we don't process any other flags
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the debug flag
|
||||
if trimmed == "debug" {
|
||||
c.debug = true
|
||||
found["debug"] = true
|
||||
continue
|
||||
}
|
||||
|
||||
destination, exists := expected[trimmed]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
var value string
|
||||
if len(parts) == 2 {
|
||||
value = parts[2]
|
||||
} else if i+1 < len(args) {
|
||||
value = args[i+1]
|
||||
i++
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
*destination = value
|
||||
found[trimmed] = true
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
@@ -3,11 +3,20 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
// version must be set by go build's -X main.version= option in the Makefile.
|
||||
var version = "unknown"
|
||||
|
||||
// gitCommit will be the hash that the binary was built from
|
||||
// and will be populated by the Makefile
|
||||
var gitCommit = ""
|
||||
|
||||
var logger = NewLogger()
|
||||
|
||||
func main() {
|
||||
@@ -21,34 +30,60 @@ func main() {
|
||||
// run is an entry point that allows for idiomatic handling of errors
|
||||
// when calling from the main function.
|
||||
func run(argv []string) (rerr error) {
|
||||
logger.Debugf("Running %v", argv)
|
||||
printVersion := hasVersionFlag(argv)
|
||||
if printVersion {
|
||||
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
|
||||
}
|
||||
|
||||
cfg, err := config.GetConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading config: %v", err)
|
||||
}
|
||||
|
||||
err = logger.LogToFile(cfg.NVIDIAContainerRuntimeConfig.DebugFilePath)
|
||||
logger, err = UpdateLogger(
|
||||
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
|
||||
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
|
||||
argv,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening debug log file: %v", err)
|
||||
return fmt.Errorf("failed to set up logger: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
// We capture and log a returning error before closing the log file.
|
||||
if rerr != nil {
|
||||
logger.Errorf("%v", rerr)
|
||||
}
|
||||
logger.CloseFile()
|
||||
logger.Reset()
|
||||
}()
|
||||
|
||||
if logLevel, err := logrus.ParseLevel(cfg.NVIDIAContainerRuntimeConfig.LogLevel); err == nil {
|
||||
logger.SetLevel(logLevel)
|
||||
} else {
|
||||
logger.Warnf("Invalid log-level '%v'; using '%v'", cfg.NVIDIAContainerRuntimeConfig.LogLevel, logger.Level.String())
|
||||
}
|
||||
|
||||
logger.Debugf("Command line arguments: %v", argv)
|
||||
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
|
||||
}
|
||||
|
||||
if printVersion {
|
||||
fmt.Print("\n")
|
||||
}
|
||||
return runtime.Exec(argv)
|
||||
}
|
||||
|
||||
// TODO: This should be refactored / combined with parseArgs in logger.
|
||||
func hasVersionFlag(args []string) bool {
|
||||
for i := 0; i < len(args); i++ {
|
||||
param := args[i]
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
trimmed := strings.TrimLeft(parts[0], "-")
|
||||
// If this is not a flag we continue
|
||||
if parts[0] == trimmed {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the version flag
|
||||
if trimmed == "version" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -24,6 +24,10 @@ const (
|
||||
unmodifiedSpecFileSuffix = "test/input/test_spec.json"
|
||||
)
|
||||
|
||||
const (
|
||||
runcExecutableName = "runc"
|
||||
)
|
||||
|
||||
type testConfig struct {
|
||||
root string
|
||||
binPath string
|
||||
@@ -80,11 +84,6 @@ func TestBadInput(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
|
||||
output, err := cmdRun.CombinedOutput()
|
||||
require.Errorf(t, err, "runtime should return an error", "output=%v", string(output))
|
||||
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
|
||||
@@ -1,178 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// experiemental represents the modifications required by the experimental runtime
|
||||
type experimental struct {
|
||||
logger *logrus.Logger
|
||||
discoverer discover.Discover
|
||||
}
|
||||
|
||||
const (
|
||||
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
|
||||
visibleDevicesVoid = "void"
|
||||
|
||||
nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK"
|
||||
)
|
||||
|
||||
// NewExperimentalModifier creates a modifier that applies the experimental
|
||||
// modications to an OCI spec if required by the runtime wrapper.
|
||||
func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
if err := ociSpec.Load(); err != nil {
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
// In experimental mode, we check whether a modification is required at all and return the lowlevelRuntime directly
|
||||
// if no modification is required.
|
||||
visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar)
|
||||
if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid {
|
||||
logger.Infof("No modification required: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists)
|
||||
return nil, nil
|
||||
}
|
||||
logger.Infof("Constructing modifier from config: %+v", cfg)
|
||||
|
||||
config := &discover.Config{
|
||||
Root: cfg.NVIDIAContainerCLIConfig.Root,
|
||||
NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path,
|
||||
}
|
||||
|
||||
var d discover.Discover
|
||||
|
||||
switch resolveAutoDiscoverMode(logger, cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) {
|
||||
case "legacy":
|
||||
legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create legacy discoverer: %v", err)
|
||||
}
|
||||
d = legacyDiscoverer
|
||||
case "csv":
|
||||
csvFiles, err := csv.GetFileList(csv.DefaultMountSpecPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||
}
|
||||
|
||||
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
|
||||
if nvidiaRequireJetpack != "csv-mounts=all" {
|
||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||
}
|
||||
|
||||
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
|
||||
}
|
||||
|
||||
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
d = discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode)
|
||||
}
|
||||
|
||||
return newExperimentalModifierFromDiscoverer(logger, d)
|
||||
}
|
||||
|
||||
// newExperimentalModifierFromDiscoverer created a modifier that aplies the discovered
|
||||
// modifications to an OCI spec if require by the runtime wrapper.
|
||||
func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
|
||||
m := experimental{
|
||||
logger: logger,
|
||||
discoverer: d,
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
// Modify applies the required modifications to the incomming OCI spec. These modifications
|
||||
// are applied in-place.
|
||||
func (m experimental) Modify(spec *specs.Spec) error {
|
||||
err := nvidiaContainerRuntimeHookRemover{m.logger}.Modify(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to remove existing hooks: %v", err)
|
||||
}
|
||||
|
||||
specEdits, err := edits.NewSpecEdits(m.logger, m.discoverer)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get required container edits: %v", err)
|
||||
}
|
||||
|
||||
return specEdits.Modify(spec)
|
||||
}
|
||||
|
||||
// resolveAutoDiscoverMode determines the correct discover mode for the specified platform if set to "auto"
|
||||
func resolveAutoDiscoverMode(logger *logrus.Logger, mode string) (rmode string) {
|
||||
if mode != "auto" {
|
||||
return mode
|
||||
}
|
||||
defer func() {
|
||||
logger.Infof("Auto-detected discover mode as '%v'", rmode)
|
||||
}()
|
||||
|
||||
isTegra, reason := isTegraSystem()
|
||||
logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
|
||||
|
||||
if isTegra {
|
||||
return "csv"
|
||||
}
|
||||
|
||||
return "legacy"
|
||||
}
|
||||
|
||||
// isTegraSystem returns true if the system is detected as a Tegra-based system
|
||||
func isTegraSystem() (bool, string) {
|
||||
const tegraReleaseFile = "/etc/nv_tegra_release"
|
||||
const tegraFamilyFile = "/sys/devices/soc0/family"
|
||||
|
||||
if info, err := os.Stat(tegraReleaseFile); err == nil && !info.IsDir() {
|
||||
return true, fmt.Sprintf("%v found", tegraReleaseFile)
|
||||
}
|
||||
|
||||
if info, err := os.Stat(tegraFamilyFile); err != nil || !info.IsDir() {
|
||||
return false, fmt.Sprintf("%v not found", tegraFamilyFile)
|
||||
}
|
||||
|
||||
contents, err := os.ReadFile(tegraFamilyFile)
|
||||
if err != nil {
|
||||
return false, fmt.Sprintf("could not read %v", tegraFamilyFile)
|
||||
}
|
||||
|
||||
if strings.HasPrefix(strings.ToLower(string(contents)), "tegra") {
|
||||
return true, fmt.Sprintf("%v has 'tegra' prefix", tegraFamilyFile)
|
||||
}
|
||||
|
||||
return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile)
|
||||
}
|
||||
@@ -1,349 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewExperimentalModifier(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
cfg *config.Config
|
||||
spec oci.Spec
|
||||
visibleDevices string
|
||||
expectedError error
|
||||
expectedNil bool
|
||||
}{
|
||||
{
|
||||
description: "spec load error returns error",
|
||||
spec: &oci.SpecMock{
|
||||
LoadFunc: func() error {
|
||||
return fmt.Errorf("load failed")
|
||||
},
|
||||
},
|
||||
expectedError: fmt.Errorf("load failed"),
|
||||
},
|
||||
{
|
||||
description: "visible devices not set returns nil",
|
||||
visibleDevices: "NOT_SET",
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "visible devices empty returns nil",
|
||||
visibleDevices: "",
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "visible devices 'void' returns nil",
|
||||
visibleDevices: "void",
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "empty config raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
expectedError: fmt.Errorf("invalid discover mode"),
|
||||
},
|
||||
{
|
||||
description: "non-legacy discover mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "non-legacy",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
expectedError: fmt.Errorf("invalid discover mode"),
|
||||
},
|
||||
{
|
||||
description: "legacy discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "legacy",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
},
|
||||
{
|
||||
description: "csv discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "csv",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
spec := tc.spec
|
||||
if spec == nil {
|
||||
spec = &oci.SpecMock{
|
||||
LookupEnvFunc: func(s string) (string, bool) {
|
||||
if tc.visibleDevices != "NOT_SET" && s == visibleDevicesEnvvar {
|
||||
return tc.visibleDevices, true
|
||||
}
|
||||
return "", false
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
m, err := NewExperimentalModifier(logger, tc.cfg, spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
if tc.expectedNil || tc.expectedError != nil {
|
||||
require.Nil(t, m)
|
||||
} else {
|
||||
require.NotNil(t, m)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExperimentalModifier(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
discover *discover.DiscoverMock
|
||||
spec *specs.Spec
|
||||
expectedError error
|
||||
expectedSpec *specs.Spec
|
||||
}{
|
||||
{
|
||||
description: "empty discoverer does not modify spec",
|
||||
discover: &discover.DiscoverMock{},
|
||||
},
|
||||
{
|
||||
description: "failed hooks discoverer returns error",
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
return nil, fmt.Errorf("discover.Hooks error")
|
||||
},
|
||||
},
|
||||
expectedError: fmt.Errorf("discover.Hooks error"),
|
||||
},
|
||||
{
|
||||
description: "discovered hooks are injected into spec",
|
||||
spec: &specs.Spec{},
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
hooks := []discover.Hook{
|
||||
{
|
||||
Lifecycle: "prestart",
|
||||
Path: "/hook/a",
|
||||
Args: []string{"/hook/a", "arga"},
|
||||
},
|
||||
{
|
||||
Lifecycle: "createContainer",
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
}
|
||||
return hooks, nil
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/a",
|
||||
Args: []string{"/hook/a", "arga"},
|
||||
},
|
||||
},
|
||||
CreateContainer: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "existing hooks are maintained",
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/a",
|
||||
Args: []string{"/hook/a", "arga"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
hooks := []discover.Hook{
|
||||
{
|
||||
Lifecycle: "prestart",
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
}
|
||||
return hooks, nil
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/a",
|
||||
Args: []string{"/hook/a", "arga"},
|
||||
},
|
||||
{
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "modification removes existing nvidia-container-runtime-hook",
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-runtime-hook",
|
||||
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
hooks := []discover.Hook{
|
||||
{
|
||||
Lifecycle: "prestart",
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
}
|
||||
return hooks, nil
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "modification removes existing nvidia-container-toolkit",
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-toolkit",
|
||||
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
hooks := []discover.Hook{
|
||||
{
|
||||
Lifecycle: "prestart",
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
}
|
||||
return hooks, nil
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
m, err := newExperimentalModifierFromDiscoverer(logger, tc.discover)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = m.Modify(tc.spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.EqualValues(t, tc.expectedSpec, tc.spec)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveDiscoverMode(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
mode string
|
||||
expectedMode string
|
||||
}{
|
||||
{
|
||||
description: "non-auto resolves to input",
|
||||
mode: "not-auto",
|
||||
expectedMode: "not-auto",
|
||||
},
|
||||
// TODO: The following test is brittle in that it will break on Tegra-based systems.
|
||||
// {
|
||||
// description: "auto resolves to legacy",
|
||||
// mode: "auto",
|
||||
// expectedMode: "legacy",
|
||||
// },
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
mode := resolveAutoDiscoverMode(logger, tc.mode)
|
||||
require.EqualValues(t, tc.expectedMode, mode)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -19,32 +19,32 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerRuncExecutableName = "docker-runc"
|
||||
runcExecutableName = "runc"
|
||||
)
|
||||
|
||||
// newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger
|
||||
func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv []string) (oci.Runtime, error) {
|
||||
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, cfg.NVIDIAContainerRuntimeConfig.Runtimes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
|
||||
}
|
||||
|
||||
if !oci.HasCreateSubcommand(argv) {
|
||||
logger.Debugf("Skipping modifier for non-create subcommand")
|
||||
return lowLevelRuntime, nil
|
||||
}
|
||||
|
||||
ociSpec, err := oci.NewSpec(logger, argv)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
|
||||
}
|
||||
|
||||
lowLevelRuntimeCandidates := []string{dockerRuncExecutableName, runcExecutableName}
|
||||
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, lowLevelRuntimeCandidates)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
|
||||
}
|
||||
|
||||
specModifier, err := newSpecModifier(logger, cfg, ociSpec)
|
||||
specModifier, err := newSpecModifier(logger, cfg, ociSpec, argv)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
|
||||
}
|
||||
@@ -61,10 +61,51 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
|
||||
}
|
||||
|
||||
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
|
||||
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
if !cfg.NVIDIAContainerRuntimeConfig.Experimental {
|
||||
return modifier.NewStableRuntimeModifier(logger), nil
|
||||
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
modeModifier, err := newModeModifier(logger, cfg, ociSpec, argv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return modifier.NewExperimentalModifier(logger, cfg, ociSpec)
|
||||
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gdsModifier, err := modifier.NewGDSModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mofedModifier, err := modifier.NewMOFEDModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tegraModifier, err := modifier.NewTegraPlatformFiles(logger)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
modifiers := modifier.Merge(
|
||||
modeModifier,
|
||||
graphicsModifier,
|
||||
gdsModifier,
|
||||
mofedModifier,
|
||||
tegraModifier,
|
||||
)
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
|
||||
case "legacy":
|
||||
return modifier.NewStableRuntimeModifier(logger), nil
|
||||
case "csv":
|
||||
return modifier.NewCSVModifier(logger, cfg, ociSpec)
|
||||
case "cdi":
|
||||
return modifier.NewCDIModifier(logger, cfg, ociSpec)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
||||
}
|
||||
|
||||
@@ -38,17 +38,27 @@ func TestFactoryMethod(t *testing.T) {
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
description: "empty config no error",
|
||||
description: "empty config raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
{
|
||||
description: "experimental flag supported",
|
||||
description: "config with runtime raises no error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Experimental: true,
|
||||
DiscoverMode: "legacy",
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv mode is supported",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
@@ -59,6 +69,43 @@ func TestFactoryMethod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-legacy discover mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "non-legacy",
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
{
|
||||
description: "legacy discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "empty mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@@ -69,7 +116,7 @@ func TestFactoryMethod(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
|
||||
|
||||
argv := []string{"--bundle", bundleDir}
|
||||
argv := []string{"--bundle", bundleDir, "create"}
|
||||
|
||||
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
|
||||
if tc.expectedError {
|
||||
|
||||
@@ -1,3 +1,48 @@
|
||||
# NVIDIA Container Toolkit CLI
|
||||
|
||||
The NVIDIA Container Toolkit CLI `nvidia-ctk` provides a number of utilities that are useful for working with the NVIDIA Container Toolkit.
|
||||
|
||||
## Functionality
|
||||
|
||||
### Configure runtimes
|
||||
|
||||
The `runtime` command of the `nvidia-ctk` CLI provides a set of utilities to related to the configuration
|
||||
and management of supported container engines.
|
||||
|
||||
For example, running the following command:
|
||||
```bash
|
||||
nvidia-ctk runtime configure --set-as-default
|
||||
```
|
||||
will ensure that the NVIDIA Container Runtime is added as the default runtime to the default container
|
||||
engine.
|
||||
|
||||
### Generate CDI specifications
|
||||
|
||||
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
|
||||
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
|
||||
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
|
||||
available NVIDIA GPUs in a system.
|
||||
|
||||
In order to generate the CDI specification for the available devices, run the following command:\
|
||||
```bash
|
||||
nvidia-ctk cdi generate
|
||||
```
|
||||
|
||||
The default is to print the specification to STDOUT and a filename can be specified using the `--output` flag.
|
||||
|
||||
The specification will contain a device entries as follows (where applicable):
|
||||
* An `nvidia.com/gpu=gpu{INDEX}` device for each non-MIG-enabled full GPU in the system
|
||||
* An `nvidia.com/gpu=mig{GPU_INDEX}:{MIG_INDEX}` device for each MIG-device in the system
|
||||
* A special device called `nvidia.com/gpu=all` which represents all available devices.
|
||||
|
||||
For example, to generate the CDI specification in the default location where CDI-enabled tools such as `podman`, `containerd`, `cri-o`, or the NVIDIA Container Runtime can be configured to load it, the following command can be run:
|
||||
|
||||
```bash
|
||||
sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||
```
|
||||
(Note that `sudo` is used to ensure the correct permissions to write to the `/etc/cdi` folder)
|
||||
|
||||
With the specification generated, a GPU can be requested by specifying the fully-qualified CDI device name. With `podman` as an exmaple:
|
||||
```bash
|
||||
podman run --rm -ti --device=nvidia.com/gpu=gpu0 ubuntu nvidia-smi -L
|
||||
```
|
||||
|
||||
50
cmd/nvidia-ctk/cdi/cdi.go
Normal file
50
cmd/nvidia-ctk/cdi/cdi.go
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cdi
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an info command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "cdi",
|
||||
Usage: "Provide tools for interacting with Container Device Interface specifications",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{
|
||||
generate.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
}
|
||||
559
cmd/nvidia-ctk/cdi/generate/generate.go
Normal file
559
cmd/nvidia-ctk/cdi/generate/generate.go
Normal file
@@ -0,0 +1,559 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaCTKExecutable = "nvidia-ctk"
|
||||
nvidiaCTKDefaultFilePath = "/usr/bin/" + nvidiaCTKExecutable
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
output string
|
||||
jsonMode bool
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build creates the CLI command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'generate-cdi' command
|
||||
c := cli.Command{
|
||||
Name: "generate",
|
||||
Usage: "Generate CDI specifications for use with CDI-enabled runtimes",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "output",
|
||||
Usage: "Specify the file to output the generated CDI specification to. If this is '-' or '' the specification is output to STDOUT",
|
||||
Destination: &cfg.output,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "json",
|
||||
Usage: "Output the generated CDI spec in JSON mode instead of YAML",
|
||||
Destination: &cfg.jsonMode,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
spec, err := m.generateSpec()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec: %v", err)
|
||||
}
|
||||
|
||||
var outputTo io.Writer
|
||||
if cfg.output == "" || cfg.output == "-" {
|
||||
outputTo = os.Stdout
|
||||
} else {
|
||||
outputFile, err := os.Create(cfg.output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create output file: %v", err)
|
||||
}
|
||||
defer outputFile.Close()
|
||||
outputTo = outputFile
|
||||
}
|
||||
|
||||
if filepath.Ext(cfg.output) == ".json" {
|
||||
cfg.jsonMode = true
|
||||
} else if filepath.Ext(cfg.output) == ".yaml" || filepath.Ext(cfg.output) == ".yml" {
|
||||
cfg.jsonMode = false
|
||||
}
|
||||
|
||||
data, err := yaml.Marshal(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal CDI spec: %v", err)
|
||||
}
|
||||
|
||||
if cfg.jsonMode {
|
||||
data, err = yaml.YAMLToJSONStrict(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to convert CDI spec from YAML to JSON: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = writeToOutput(cfg.jsonMode, data, outputTo)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write output: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeToOutput(jsonMode bool, data []byte, output io.Writer) error {
|
||||
if !jsonMode {
|
||||
_, err := output.Write([]byte("---\n"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write YAML separator: %v", err)
|
||||
}
|
||||
|
||||
}
|
||||
_, err := output.Write(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write data: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) generateSpec() (*specs.Spec, error) {
|
||||
nvmllib := nvml.New()
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, r
|
||||
}
|
||||
defer nvmllib.Shutdown()
|
||||
|
||||
devicelib := device.New(device.WithNvml(nvmllib))
|
||||
|
||||
spec := specs.Spec{
|
||||
Version: "0.4.0",
|
||||
Kind: "nvidia.com/gpu",
|
||||
ContainerEdits: specs.ContainerEdits{},
|
||||
}
|
||||
err := devicelib.VisitDevices(func(i int, d device.Device) error {
|
||||
isMig, err := d.IsMigEnabled()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
|
||||
}
|
||||
if isMig {
|
||||
return nil
|
||||
}
|
||||
device, err := generateEditsForDevice(newGPUDevice(i, d))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec for device %v: %v", i, err)
|
||||
}
|
||||
|
||||
graphicsEdits, err := m.editsForGraphicsDevice(d)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec for DRM devices associated with device %v: %v", i, err)
|
||||
}
|
||||
|
||||
// We add the device nodes and hooks edits for the DRM devices; Mounts are added globally
|
||||
for _, dn := range graphicsEdits.DeviceNodes {
|
||||
device.ContainerEdits.DeviceNodes = append(device.ContainerEdits.DeviceNodes, dn)
|
||||
}
|
||||
for _, h := range graphicsEdits.Hooks {
|
||||
device.ContainerEdits.Hooks = append(device.ContainerEdits.Hooks, h)
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, device)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
|
||||
}
|
||||
|
||||
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, m device.MigDevice) error {
|
||||
device, err := generateEditsForDevice(newMigDevice(i, j, m))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec for device %v: %v", i, err)
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, device)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
|
||||
}
|
||||
|
||||
// We create an "all" device with all the discovered device nodes
|
||||
var allDeviceNodes []*specs.DeviceNode
|
||||
for _, d := range spec.Devices {
|
||||
for _, dn := range d.ContainerEdits.DeviceNodes {
|
||||
allDeviceNodes = append(allDeviceNodes, dn)
|
||||
}
|
||||
}
|
||||
all := specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: allDeviceNodes,
|
||||
},
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, all)
|
||||
spec.ContainerEdits.DeviceNodes = m.getExistingMetaDeviceNodes()
|
||||
|
||||
libraries, err := m.findLibs(nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver libraries: %v", err)
|
||||
}
|
||||
|
||||
binaries, err := m.findBinaries()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver binaries: %v", err)
|
||||
}
|
||||
|
||||
ipcs, err := m.findIPC()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver IPC sockets: %v", err)
|
||||
}
|
||||
|
||||
graphicsEdits, err := m.editsForGraphicsDevice(nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generated edits for graphics libraries")
|
||||
}
|
||||
|
||||
libOptions := []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
}
|
||||
ipcOptions := append(libOptions, "noexec")
|
||||
|
||||
spec.ContainerEdits.Mounts = append(
|
||||
generateMountsForPaths(libOptions, libraries, binaries),
|
||||
generateMountsForPaths(ipcOptions, ipcs)...,
|
||||
)
|
||||
|
||||
spec.ContainerEdits.Mounts = append(spec.ContainerEdits.Mounts, graphicsEdits.Mounts...)
|
||||
|
||||
ldcacheUpdateHook := m.generateUpdateLdCacheHook(libraries)
|
||||
|
||||
deviceFolderPermissionHooks, err := m.generateDeviceFolderPermissionHooks(ldcacheUpdateHook.Path, allDeviceNodes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
|
||||
}
|
||||
|
||||
spec.ContainerEdits.Hooks = append([]*specs.Hook{ldcacheUpdateHook}, deviceFolderPermissionHooks...)
|
||||
|
||||
return &spec, nil
|
||||
}
|
||||
|
||||
func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
|
||||
deviceNodePaths, err := d.GetDeviceNodes()
|
||||
if err != nil {
|
||||
return specs.Device{}, fmt.Errorf("failed to get paths for device: %v", err)
|
||||
}
|
||||
|
||||
deviceNodes := getDeviceNodesFromPaths(deviceNodePaths)
|
||||
|
||||
device := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: deviceNodes,
|
||||
},
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (m command) editsForGraphicsDevice(device device.Device) (*specs.ContainerEdits, error) {
|
||||
selectedDevice := image.NewVisibleDevices("none")
|
||||
if device != nil {
|
||||
uuid, ret := device.GetUUID()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device UUID: %v", ret)
|
||||
}
|
||||
selectedDevice = image.NewVisibleDevices(uuid)
|
||||
}
|
||||
cfg := discover.Config{
|
||||
Root: "",
|
||||
NVIDIAContainerToolkitCLIExecutablePath: "nvidia-ctk",
|
||||
}
|
||||
// Create a discoverer for the single device:
|
||||
d, err := discover.NewGraphicsDiscoverer(m.logger, selectedDevice, &cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing discoverer: %v", err)
|
||||
}
|
||||
|
||||
devices, err := d.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting DRM devices: %v", err)
|
||||
}
|
||||
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
for _, d := range devices {
|
||||
dn := specs.DeviceNode{
|
||||
Path: d.Path,
|
||||
HostPath: d.HostPath,
|
||||
}
|
||||
deviceNodes = append(deviceNodes, &dn)
|
||||
}
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting hooks: %v", err)
|
||||
}
|
||||
|
||||
var cdiHooks []*specs.Hook
|
||||
for _, h := range hooks {
|
||||
cdiHook := specs.Hook{
|
||||
HookName: h.Lifecycle,
|
||||
Path: h.Path,
|
||||
Args: h.Args,
|
||||
}
|
||||
cdiHooks = append(cdiHooks, &cdiHook)
|
||||
}
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting mounts: %v", err)
|
||||
}
|
||||
|
||||
var cdiMounts []*specs.Mount
|
||||
for _, m := range mounts {
|
||||
cdiMount := specs.Mount{
|
||||
ContainerPath: m.Path,
|
||||
HostPath: m.HostPath,
|
||||
Options: []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
},
|
||||
Type: "bind",
|
||||
}
|
||||
cdiMounts = append(cdiMounts, &cdiMount)
|
||||
}
|
||||
|
||||
edits := specs.ContainerEdits{
|
||||
DeviceNodes: deviceNodes,
|
||||
Hooks: cdiHooks,
|
||||
Mounts: cdiMounts,
|
||||
}
|
||||
|
||||
return &edits, nil
|
||||
}
|
||||
|
||||
func (m command) getExistingMetaDeviceNodes() []*specs.DeviceNode {
|
||||
metaDeviceNodePaths := []string{
|
||||
"/dev/nvidia-modeset",
|
||||
"/dev/nvidia-uvm-tools",
|
||||
"/dev/nvidia-uvm",
|
||||
"/dev/nvidiactl",
|
||||
}
|
||||
|
||||
var existingDeviceNodePaths []string
|
||||
for _, p := range metaDeviceNodePaths {
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
m.logger.Infof("Ignoring missing meta device %v", p)
|
||||
continue
|
||||
}
|
||||
existingDeviceNodePaths = append(existingDeviceNodePaths, p)
|
||||
}
|
||||
|
||||
return getDeviceNodesFromPaths(existingDeviceNodePaths)
|
||||
}
|
||||
|
||||
func getDeviceNodesFromPaths(deviceNodePaths []string) []*specs.DeviceNode {
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
for _, p := range deviceNodePaths {
|
||||
deviceNode := specs.DeviceNode{
|
||||
Path: p,
|
||||
}
|
||||
deviceNodes = append(deviceNodes, &deviceNode)
|
||||
}
|
||||
|
||||
return deviceNodes
|
||||
}
|
||||
|
||||
func (m command) findLibs(nvmllib nvml.Interface) ([]string, error) {
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||
}
|
||||
m.logger.Infof("Using driver version %v", version)
|
||||
|
||||
cache, err := ldcache.New(m.logger, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load ldcache: %v", err)
|
||||
}
|
||||
|
||||
libs32, libs64 := cache.List()
|
||||
|
||||
var libs []string
|
||||
for _, l := range libs64 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
m.logger.Infof("found 64-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
for _, l := range libs32 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
m.logger.Infof("found 32-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
return libs, nil
|
||||
}
|
||||
|
||||
func (m command) findBinaries() ([]string, error) {
|
||||
candidates := []string{
|
||||
"nvidia-smi", /* System management interface */
|
||||
"nvidia-debugdump", /* GPU coredump utility */
|
||||
"nvidia-persistenced", /* Persistence mode utility */
|
||||
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
||||
"nvidia-cuda-mps-server", /* Multi process service server */
|
||||
}
|
||||
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
|
||||
var binaries []string
|
||||
for _, c := range candidates {
|
||||
targets, err := locator.Locate(c)
|
||||
if err != nil {
|
||||
m.logger.Warningf("skipping %v: %v", c, err)
|
||||
continue
|
||||
}
|
||||
|
||||
binaries = append(binaries, targets[0])
|
||||
}
|
||||
return binaries, nil
|
||||
}
|
||||
|
||||
func (m command) findIPC() ([]string, error) {
|
||||
candidates := []string{
|
||||
"/var/run/nvidia-persistenced/socket",
|
||||
"/var/run/nvidia-fabricmanager/socket",
|
||||
// TODO: This can be controlled by the NV_MPS_PIPE_DIR envvar
|
||||
"/tmp/nvidia-mps",
|
||||
}
|
||||
|
||||
locator := lookup.NewFileLocator(m.logger, "")
|
||||
|
||||
var ipcs []string
|
||||
for _, c := range candidates {
|
||||
targets, err := locator.Locate(c)
|
||||
if err != nil {
|
||||
m.logger.Warningf("skipping %v: %v", c, err)
|
||||
continue
|
||||
}
|
||||
|
||||
ipcs = append(ipcs, targets[0])
|
||||
}
|
||||
return ipcs, nil
|
||||
}
|
||||
|
||||
func generateMountsForPaths(options []string, pathSets ...[]string) []*specs.Mount {
|
||||
var mounts []*specs.Mount
|
||||
for _, paths := range pathSets {
|
||||
for _, p := range paths {
|
||||
mount := specs.Mount{
|
||||
HostPath: p,
|
||||
// We may want to adjust the container path
|
||||
ContainerPath: p,
|
||||
Type: "bind",
|
||||
Options: options,
|
||||
}
|
||||
mounts = append(mounts, &mount)
|
||||
}
|
||||
}
|
||||
return mounts
|
||||
}
|
||||
|
||||
func (m command) generateUpdateLdCacheHook(libraries []string) *specs.Hook {
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
|
||||
hook := discover.CreateLDCacheUpdateHook(
|
||||
m.logger,
|
||||
locator,
|
||||
nvidiaCTKExecutable,
|
||||
nvidiaCTKDefaultFilePath,
|
||||
libraries,
|
||||
)
|
||||
return &specs.Hook{
|
||||
HookName: hook.Lifecycle,
|
||||
Path: hook.Path,
|
||||
Args: hook.Args,
|
||||
}
|
||||
}
|
||||
|
||||
func (m command) generateDeviceFolderPermissionHooks(nvidiaCTKPath string, deviceNodes []*specs.DeviceNode) ([]*specs.Hook, error) {
|
||||
var deviceFolders []string
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, dn := range deviceNodes {
|
||||
if !strings.HasPrefix(dn.Path, "/dev") {
|
||||
m.logger.Warningf("Skipping unexpected device folder path for device %v", dn.Path)
|
||||
continue
|
||||
}
|
||||
for df := filepath.Dir(dn.Path); df != "/dev"; df = filepath.Dir(df) {
|
||||
if seen[df] {
|
||||
continue
|
||||
}
|
||||
deviceFolders = append(deviceFolders, df)
|
||||
seen[df] = true
|
||||
}
|
||||
}
|
||||
|
||||
foldersByMode := make(map[string][]string)
|
||||
for _, p := range deviceFolders {
|
||||
info, err := os.Stat(p)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get info for path %v: %v", p, err)
|
||||
}
|
||||
mode := fmt.Sprintf("%o", info.Mode().Perm())
|
||||
foldersByMode[mode] = append(foldersByMode[mode], p)
|
||||
}
|
||||
|
||||
var hooks []*specs.Hook
|
||||
for mode, folders := range foldersByMode {
|
||||
args := []string{filepath.Base(nvidiaCTKPath), "hook", "chmod", "--mode", mode}
|
||||
for _, folder := range folders {
|
||||
args = append(args, "--path", folder)
|
||||
}
|
||||
hook := specs.Hook{
|
||||
HookName: cdi.CreateContainerHook,
|
||||
Path: nvidiaCTKPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
hooks = append(hooks, &hook)
|
||||
}
|
||||
|
||||
return hooks, nil
|
||||
}
|
||||
123
cmd/nvidia-ctk/cdi/generate/nvml_devices.go
Normal file
123
cmd/nvidia-ctk/cdi/generate/nvml_devices.go
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// nvmlDevice wraps an nvml.Device with more functions.
|
||||
type nvmlDevice struct {
|
||||
nvml.Device
|
||||
}
|
||||
|
||||
// nvmlMigDevice allows for specific functions of nvmlDevice to be overridden.
|
||||
type nvmlMigDevice nvmlDevice
|
||||
|
||||
// deviceInfo defines the information the required to construct a Device
|
||||
type deviceInfo interface {
|
||||
GetUUID() (string, error)
|
||||
GetDeviceNodes() ([]string, error)
|
||||
}
|
||||
|
||||
var _ deviceInfo = (*nvmlDevice)(nil)
|
||||
var _ deviceInfo = (*nvmlMigDevice)(nil)
|
||||
|
||||
func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) {
|
||||
return fmt.Sprintf("gpu%v", i), nvmlDevice{gpu}
|
||||
}
|
||||
|
||||
func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) {
|
||||
return fmt.Sprintf("mig%v:%v", i, j), nvmlMigDevice{mig}
|
||||
}
|
||||
|
||||
// GetUUID returns the UUID of the device
|
||||
func (d nvmlDevice) GetUUID() (string, error) {
|
||||
uuid, ret := d.Device.GetUUID()
|
||||
if ret != nvml.SUCCESS {
|
||||
return "", ret
|
||||
}
|
||||
return uuid, nil
|
||||
}
|
||||
|
||||
// GetUUID returns the UUID of the device
|
||||
func (d nvmlMigDevice) GetUUID() (string, error) {
|
||||
return nvmlDevice(d).GetUUID()
|
||||
}
|
||||
|
||||
// GetDeviceNodes returns the device node paths for a GPU device
|
||||
func (d nvmlDevice) GetDeviceNodes() ([]string, error) {
|
||||
minor, ret := d.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
path := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
return []string{path}, nil
|
||||
}
|
||||
|
||||
// GetDeviceNodes returns the device node paths for a MIG device
|
||||
func (d nvmlMigDevice) GetDeviceNodes() ([]string, error) {
|
||||
parent, ret := d.GetDeviceHandleFromMigDeviceHandle()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting parent device: %v", ret)
|
||||
}
|
||||
minor, ret := parent.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
migCaps, err := nvcaps.NewMigCaps()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
|
||||
}
|
||||
|
||||
gi, ret := d.GetGpuInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
ci, ret := d.GetComputeInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
|
||||
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
|
||||
}
|
||||
|
||||
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
|
||||
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
||||
}
|
||||
|
||||
devicePaths := []string{
|
||||
parentPath,
|
||||
giCapDevicePath,
|
||||
ciCapDevicePath,
|
||||
}
|
||||
|
||||
return devicePaths, nil
|
||||
}
|
||||
140
cmd/nvidia-ctk/hook/chmod/chmod.go
Normal file
140
cmd/nvidia-ctk/hook/chmod/chmod.go
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package chmod
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
paths cli.StringSlice
|
||||
mode string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a chmod command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the chmod command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'chmod' command
|
||||
c := cli.Command{
|
||||
Name: "chmod",
|
||||
Usage: "Set the permissions of folders in the container by running chmod. The container root is prefixed to the specified paths.",
|
||||
Before: func(c *cli.Context) error {
|
||||
return validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "path",
|
||||
Usage: "Specifiy a path to apply the specified mode to",
|
||||
Destination: &cfg.paths,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "mode",
|
||||
Usage: "Specify the file mode",
|
||||
Destination: &cfg.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func validateFlags(c *cli.Context, cfg *config) error {
|
||||
if strings.TrimSpace(cfg.mode) == "" {
|
||||
return fmt.Errorf("a non-empty mode must be specified")
|
||||
}
|
||||
|
||||
for _, p := range cfg.paths.Value() {
|
||||
if strings.TrimSpace(p) == "" {
|
||||
return fmt.Errorf("paths must not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
if containerRoot == "" {
|
||||
return fmt.Errorf("empty container root detected")
|
||||
}
|
||||
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value())
|
||||
if len(paths) == 0 {
|
||||
m.logger.Debugf("No paths specified; exiting")
|
||||
return nil
|
||||
}
|
||||
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
targets, err := locator.Locate("chmod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to locate chmod: %v", err)
|
||||
}
|
||||
chmodPath := targets[0]
|
||||
|
||||
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
|
||||
|
||||
return syscall.Exec(chmodPath, args, nil)
|
||||
}
|
||||
|
||||
// getPaths updates the specified paths relative to the root.
|
||||
func (m command) getPaths(root string, paths []string) []string {
|
||||
var pathsInRoot []string
|
||||
for _, f := range paths {
|
||||
pathsInRoot = append(pathsInRoot, filepath.Join(root, f))
|
||||
}
|
||||
|
||||
return pathsInRoot
|
||||
}
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
@@ -35,6 +36,7 @@ type command struct {
|
||||
type config struct {
|
||||
hostRoot string
|
||||
filenames cli.StringSlice
|
||||
links cli.StringSlice
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
@@ -66,11 +68,15 @@ func (m command) build() *cli.Command {
|
||||
Destination: &cfg.hostRoot,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "csv-filenames",
|
||||
Aliases: []string{"f"},
|
||||
Usage: "Specify the (CSV) filenames to process",
|
||||
Name: "csv-filename",
|
||||
Usage: "Specify a (CSV) filename to process",
|
||||
Destination: &cfg.filenames,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "link",
|
||||
Usage: "Specify a specific link to create. The link is specified as target::link",
|
||||
Destination: &cfg.links,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
@@ -87,14 +93,9 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
spec, err := s.LoadSpec()
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
var containerRoot string
|
||||
if spec.Root != nil {
|
||||
containerRoot = spec.Root.Path
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
csvFiles := cfg.filenames.Value()
|
||||
@@ -135,40 +136,59 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
m.logger.Debugf("%v is not a symlink", candidate)
|
||||
continue
|
||||
}
|
||||
target, err := changeRoot(cfg.hostRoot, "/", targets[0])
|
||||
|
||||
err = m.createLink(created, cfg.hostRoot, containerRoot, targets[0], candidate)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to resolve path for target %v relative to %v: %v", target, cfg.hostRoot, err)
|
||||
m.logger.Warnf("Failed to create link %v: %v", []string{targets[0], candidate}, err)
|
||||
}
|
||||
}
|
||||
|
||||
links := cfg.links.Value()
|
||||
for _, l := range links {
|
||||
parts := strings.Split(l, "::")
|
||||
if len(parts) != 2 {
|
||||
m.logger.Warnf("Invalid link specification %v", l)
|
||||
continue
|
||||
}
|
||||
|
||||
linkPath, err := changeRoot(cfg.hostRoot, containerRoot, candidate)
|
||||
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to resolve path for link %v relative to %v: %v", candidate, cfg.hostRoot, err)
|
||||
continue
|
||||
m.logger.Warnf("Failed to create link %v: %v", parts, err)
|
||||
}
|
||||
|
||||
if created[linkPath] {
|
||||
m.logger.Debugf("Link %v already created", linkPath)
|
||||
continue
|
||||
}
|
||||
m.logger.Infof("Symlinking %v to %v", linkPath, target)
|
||||
err = os.MkdirAll(filepath.Dir(linkPath), 0755)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Faild to create directory: %v", err)
|
||||
continue
|
||||
}
|
||||
err = os.Symlink(target, linkPath)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to create symlink: %v", err)
|
||||
continue
|
||||
}
|
||||
created[linkPath] = true
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
|
||||
linkPath, err := changeRoot(hostRoot, containerRoot, link)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
|
||||
}
|
||||
if created[linkPath] {
|
||||
m.logger.Debugf("Link %v already created", linkPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
targetPath, err := changeRoot(hostRoot, "/", target)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)
|
||||
err = os.MkdirAll(filepath.Dir(linkPath), 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
err = os.Symlink(target, linkPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create symlink: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func changeRoot(current string, new string, path string) (string, error) {
|
||||
if !filepath.IsAbs(path) {
|
||||
return path, nil
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package hook
|
||||
|
||||
import (
|
||||
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -46,6 +47,7 @@ func (m hookCommand) build() *cli.Command {
|
||||
hook.Subcommands = []*cli.Command{
|
||||
ldcache.NewCommand(m.logger),
|
||||
symlinks.NewCommand(m.logger),
|
||||
chmod.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
|
||||
@@ -59,8 +59,8 @@ func (m command) build() *cli.Command {
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "folders",
|
||||
Usage: "Specifiy the additional folders to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Name: "folder",
|
||||
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Destination: &cfg.folders,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
@@ -79,14 +79,9 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
spec, err := s.LoadSpec()
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
var containerRoot string
|
||||
if spec.Root != nil {
|
||||
containerRoot = spec.Root.Path
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
err = m.createConfig(containerRoot, cfg.folders.Value())
|
||||
|
||||
47
cmd/nvidia-ctk/info/info.go
Normal file
47
cmd/nvidia-ctk/info/info.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an info command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "info",
|
||||
Usage: "Provide information about the system",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{}
|
||||
|
||||
return &hook
|
||||
}
|
||||
@@ -19,13 +19,16 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
|
||||
infoCLI "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
var version string
|
||||
|
||||
var logger = log.New()
|
||||
|
||||
// config defines the options that can be set for the CLI through config files,
|
||||
@@ -41,10 +44,11 @@ func main() {
|
||||
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "NVIDIA Container Toolkit CLI"
|
||||
c.UseShortOptionHandling = true
|
||||
c.EnableBashCompletion = true
|
||||
c.Usage = "Tools to configure the NVIDIA Container Toolkit"
|
||||
c.Version = version
|
||||
c.Version = info.GetVersionString()
|
||||
|
||||
// Setup the flags for this command
|
||||
c.Flags = []cli.Flag{
|
||||
@@ -70,6 +74,9 @@ func main() {
|
||||
// Define the subcommands
|
||||
c.Commands = []*cli.Command{
|
||||
hook.NewCommand(logger),
|
||||
runtime.NewCommand(logger),
|
||||
infoCLI.NewCommand(logger),
|
||||
cdi.NewCommand(logger),
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
|
||||
203
cmd/nvidia-ctk/runtime/configure/configure.go
Normal file
203
cmd/nvidia-ctk/runtime/configure/configure.go
Normal file
@@ -0,0 +1,203 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package configure
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker"
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultRuntime = "docker"
|
||||
|
||||
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
|
||||
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an configure command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// config defines the options that can be set for the CLI through config files,
|
||||
// environment variables, or command line config
|
||||
type config struct {
|
||||
dryRun bool
|
||||
runtime string
|
||||
configFilePath string
|
||||
nvidiaOptions nvidia.Options
|
||||
}
|
||||
|
||||
func (m command) build() *cli.Command {
|
||||
// Create a config struct to hold the parsed environment variables or command line flags
|
||||
config := config{}
|
||||
|
||||
// Create the 'configure' command
|
||||
configure := cli.Command{
|
||||
Name: "configure",
|
||||
Usage: "Add a runtime to the specified container engine",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.configureWrapper(c, &config)
|
||||
},
|
||||
}
|
||||
|
||||
configure.Flags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "update the runtime configuration as required but don't write changes to disk",
|
||||
Destination: &config.dryRun,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime",
|
||||
Usage: "the target runtime engine. One of [crio, docker]",
|
||||
Value: defaultRuntime,
|
||||
Destination: &config.runtime,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config",
|
||||
Usage: "path to the config file for the target runtime",
|
||||
Destination: &config.configFilePath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-runtime-name",
|
||||
Usage: "specify the name of the NVIDIA runtime that will be added",
|
||||
Value: nvidia.RuntimeName,
|
||||
Destination: &config.nvidiaOptions.RuntimeName,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime-path",
|
||||
Usage: "specify the path to the NVIDIA runtime executable",
|
||||
Value: nvidia.RuntimeExecutable,
|
||||
Destination: &config.nvidiaOptions.RuntimePath,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "set-as-default",
|
||||
Usage: "set the specified runtime as the default runtime",
|
||||
Destination: &config.nvidiaOptions.SetAsDefault,
|
||||
},
|
||||
}
|
||||
|
||||
return &configure
|
||||
}
|
||||
|
||||
func (m command) configureWrapper(c *cli.Context, config *config) error {
|
||||
switch config.runtime {
|
||||
case "crio":
|
||||
return m.configureCrio(c, config)
|
||||
case "docker":
|
||||
return m.configureDocker(c, config)
|
||||
}
|
||||
|
||||
return fmt.Errorf("unrecognized runtime '%v'", config.runtime)
|
||||
}
|
||||
|
||||
// configureDocker updates the docker config to enable the NVIDIA Container Runtime
|
||||
func (m command) configureDocker(c *cli.Context, config *config) error {
|
||||
configFilePath := config.configFilePath
|
||||
if configFilePath == "" {
|
||||
configFilePath = defaultDockerConfigFilePath
|
||||
}
|
||||
|
||||
cfg, err := docker.LoadConfig(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = docker.UpdateConfig(
|
||||
cfg,
|
||||
config.nvidiaOptions.RuntimeName,
|
||||
config.nvidiaOptions.RuntimePath,
|
||||
config.nvidiaOptions.SetAsDefault,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
if config.dryRun {
|
||||
output, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to JSON: %v", err)
|
||||
}
|
||||
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
|
||||
return nil
|
||||
}
|
||||
err = docker.FlushConfig(cfg, configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Wrote updated config to %v", configFilePath)
|
||||
m.logger.Infof("It is recommended that the docker daemon be restarted.")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// configureCrio updates the crio config to enable the NVIDIA Container Runtime
|
||||
func (m command) configureCrio(c *cli.Context, config *config) error {
|
||||
configFilePath := config.configFilePath
|
||||
if configFilePath == "" {
|
||||
configFilePath = defaultCrioConfigFilePath
|
||||
}
|
||||
|
||||
cfg, err := crio.LoadConfig(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = crio.UpdateConfig(
|
||||
cfg,
|
||||
config.nvidiaOptions.RuntimeName,
|
||||
config.nvidiaOptions.RuntimePath,
|
||||
config.nvidiaOptions.SetAsDefault,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
if config.dryRun {
|
||||
output, err := toml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to TOML: %v", err)
|
||||
}
|
||||
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
|
||||
return nil
|
||||
}
|
||||
err = crio.FlushConfig(configFilePath, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Wrote updated config to %v", configFilePath)
|
||||
m.logger.Infof("It is recommended that the cri-o daemon be restarted.")
|
||||
|
||||
return nil
|
||||
}
|
||||
75
cmd/nvidia-ctk/runtime/nvidia/nvidia.go
Normal file
75
cmd/nvidia-ctk/runtime/nvidia/nvidia.go
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvidia
|
||||
|
||||
const (
|
||||
// RuntimeName is the default name to use in configs for the NVIDIA Container Runtime
|
||||
RuntimeName = "nvidia"
|
||||
// RuntimeExecutable is the default NVIDIA Container Runtime executable file name
|
||||
RuntimeExecutable = "nvidia-container-runtime"
|
||||
)
|
||||
|
||||
// Options specifies the options for the NVIDIA Container Runtime w.r.t a container engine such as docker.
|
||||
type Options struct {
|
||||
SetAsDefault bool
|
||||
RuntimeName string
|
||||
RuntimePath string
|
||||
}
|
||||
|
||||
// Runtime defines an NVIDIA runtime with a name and a executable
|
||||
type Runtime struct {
|
||||
Name string
|
||||
Path string
|
||||
}
|
||||
|
||||
// DefaultRuntime returns the default runtime for the configured options.
|
||||
// If the configuration is invalid or the default runtimes should not be set
|
||||
// the empty string is returned.
|
||||
func (o Options) DefaultRuntime() string {
|
||||
if !o.SetAsDefault {
|
||||
return ""
|
||||
}
|
||||
|
||||
return o.RuntimeName
|
||||
}
|
||||
|
||||
// Runtime creates a runtime struct based on the options.
|
||||
func (o Options) Runtime() Runtime {
|
||||
path := o.RuntimePath
|
||||
|
||||
if o.RuntimePath == "" {
|
||||
path = RuntimeExecutable
|
||||
}
|
||||
|
||||
r := Runtime{
|
||||
Name: o.RuntimeName,
|
||||
Path: path,
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// DockerRuntimesConfig generatest the expected docker config for the specified runtime
|
||||
func (r Runtime) DockerRuntimesConfig() map[string]interface{} {
|
||||
runtimes := make(map[string]interface{})
|
||||
runtimes[r.Name] = map[string]interface{}{
|
||||
"path": r.Path,
|
||||
"args": []string{},
|
||||
}
|
||||
|
||||
return runtimes
|
||||
}
|
||||
49
cmd/nvidia-ctk/runtime/runtime.go
Normal file
49
cmd/nvidia-ctk/runtime/runtime.go
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type runtimeCommand struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs a runtime command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := runtimeCommand{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
func (m runtimeCommand) build() *cli.Command {
|
||||
// Create the 'runtime' command
|
||||
runtime := cli.Command{
|
||||
Name: "runtime",
|
||||
Usage: "A collection of runtime-related utilities for the NVIDIA Container Toolkit",
|
||||
}
|
||||
|
||||
runtime.Subcommands = []*cli.Command{
|
||||
configure.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &runtime
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
#path = "/usr/bin/nvidia-container-cli"
|
||||
environment = []
|
||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||
#ldcache = "/etc/ld.so.cache"
|
||||
load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
@@ -16,4 +16,17 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
@@ -16,4 +16,17 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
@@ -16,4 +16,17 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
@@ -16,4 +16,17 @@ ldconfig = "@/sbin/ldconfig.real"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
ARG BASEIMAGE
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
wget \
|
||||
git \
|
||||
rpm-build \
|
||||
make && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
RUN set -eux; \
|
||||
\
|
||||
arch="$(uname -m)"; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture"; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
| tar -C /usr/local -xz
|
||||
|
||||
ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
RUN mkdir -p $DIST_DIR /dist
|
||||
|
||||
# nvidia-container-toolkit
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
# This might not be useful on Amazon Linux, but it's simpler to keep the RHEL
|
||||
# and Amazon Linux packages identical.
|
||||
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
|
||||
|
||||
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
|
||||
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
|
||||
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
@@ -32,6 +32,7 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
@@ -48,6 +49,8 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
@@ -62,12 +65,17 @@ RUN if [ "$(lsb_release -cs)" = "jessie" ]; then \
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
|
||||
RUN sed -i "s;@VERSION@;${REVISION};" debian/changelog && \
|
||||
dch --changelog debian/changelog --append "Bump libnvidia-container dependency to ${REVISION}}" && \
|
||||
dch --changelog debian/changelog -r "" && \
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
RUN dch --create --package="${PKG_NAME}" \
|
||||
--newversion "${REVISION}" \
|
||||
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
|
||||
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER1_VERSION}" && \
|
||||
dch -r "" && \
|
||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||
|
||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_VERSION="${REVISION}" \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/nvidia-container-toolkit_*.deb /dist
|
||||
mv /tmp/*.deb /dist
|
||||
|
||||
@@ -14,7 +14,8 @@
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
FROM golang:${GOLANG_VERSION}
|
||||
|
||||
RUN go get -u golang.org/x/lint/golint
|
||||
RUN go get -u github.com/matryer/moq
|
||||
RUN go get -u github.com/gordonklaus/ineffassign
|
||||
RUN go get -u github.com/client9/misspell/cmd/misspell
|
||||
RUN go install golang.org/x/lint/golint@latest
|
||||
RUN go install github.com/matryer/moq@latest
|
||||
RUN go install github.com/gordonklaus/ineffassign@latest
|
||||
RUN go install github.com/client9/misspell/cmd/misspell@latest
|
||||
RUN go install github.com/google/go-licenses@latest
|
||||
|
||||
@@ -25,11 +25,12 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
ENV PKG_NAME ${PKG_NAME}
|
||||
ENV PKG_VERS ${PKG_VERS}
|
||||
ENV PKG_REV ${PKG_REV}
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
@@ -39,6 +40,8 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
@@ -54,11 +57,16 @@ COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version ${PKG_VERS}" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release ${PKG_REV}" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
|
||||
@@ -1,3 +1,19 @@
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This is the dockerfile for building packages on yum-based RPM systems.
|
||||
|
||||
ARG BASEIMAGE
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
@@ -27,11 +43,12 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
ENV PKG_NAME ${PKG_NAME}
|
||||
ENV PKG_VERS ${PKG_VERS}
|
||||
ENV PKG_REV ${PKG_REV}
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
@@ -41,6 +58,8 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
@@ -56,11 +75,16 @@ COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version ${PKG_VERS}" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release ${PKG_REV}" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
@@ -30,6 +30,7 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
@@ -46,6 +47,8 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
@@ -55,12 +58,17 @@ COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
|
||||
RUN sed -i "s;@VERSION@;${REVISION};" debian/changelog && \
|
||||
dch --changelog debian/changelog --append "Bump libnvidia-container dependency to ${REVISION}}" && \
|
||||
dch --changelog debian/changelog -r "" && \
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
RUN dch --create --package="${PKG_NAME}" \
|
||||
--newversion "${REVISION}" \
|
||||
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
|
||||
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" && \
|
||||
dch -r "" && \
|
||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||
|
||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_VERSION="${REVISION}" \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/*.deb /dist
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
# Supported OSs by architecture
|
||||
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
|
||||
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
|
||||
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
|
||||
AARCH64_TARGETS := centos8 rhel8 amazonlinux2
|
||||
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
|
||||
|
||||
# Define top-level build targets
|
||||
docker%: SHELL:=/bin/bash
|
||||
@@ -85,37 +85,63 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
||||
--%: docker-build-%
|
||||
@
|
||||
|
||||
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
|
||||
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
|
||||
|
||||
# private ubuntu target
|
||||
--ubuntu%: OS := ubuntu
|
||||
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--ubuntu%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
--ubuntu%: PKG_REV := 1
|
||||
|
||||
# private debian target
|
||||
--debian%: OS := debian
|
||||
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--debian%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
--debian%: PKG_REV := 1
|
||||
|
||||
# private centos target
|
||||
--centos%: OS := centos
|
||||
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private fedora target
|
||||
--fedora%: OS := fedora
|
||||
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
# The fedora(35) base image has very slow performance when building aarch64 packages.
|
||||
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
|
||||
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private amazonlinux target
|
||||
--amazonlinux%: OS := amazonlinux
|
||||
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
|
||||
# private opensuse-leap target
|
||||
--opensuse-leap%: OS = opensuse-leap
|
||||
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
|
||||
--opensuse-leap%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
# private rhel target (actually built on centos)
|
||||
--rhel%: OS := centos
|
||||
--rhel%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
|
||||
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
|
||||
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--rhel%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
|
||||
# We allow the CONFIG_TOML_SUFFIX to be overridden.
|
||||
CONFIG_TOML_SUFFIX ?= $(OS)
|
||||
|
||||
@@ -128,9 +154,12 @@ docker-build-%:
|
||||
--progress=plain \
|
||||
--build-arg BASEIMAGE="$(BASEIMAGE)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg PKG_NAME="$(LIB_NAME)" \
|
||||
--build-arg PKG_VERS="$(LIB_VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
|
||||
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
|
||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
--file $(DOCKERFILE) .
|
||||
$(DOCKER) run \
|
||||
|
||||
36
go.mod
36
go.mod
@@ -1,17 +1,41 @@
|
||||
module github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
go 1.14
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.0.0
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.3.1-0.20220224133719-e5457123010b
|
||||
github.com/containers/podman/v4 v4.0.1
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.5.2
|
||||
github.com/opencontainers/runc v1.1.4
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab
|
||||
github.com/pelletier/go-toml v1.9.4
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/tsaikd/KDGoLib v0.0.0-20191001134900-7f3cf518e07d
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220922133427-1049a7fa76a9
|
||||
golang.org/x/mod v0.5.0
|
||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
|
||||
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6
|
||||
sigs.k8s.io/yaml v1.3.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/blang/semver v3.5.1+incompatible // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/fsnotify/fsnotify v1.5.4 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20220110225228-7e2d60f1e41f // indirect
|
||||
github.com/opencontainers/selinux v1.10.1 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
|
||||
)
|
||||
|
||||
@@ -61,7 +61,7 @@ func GetConfig() (*Config, error) {
|
||||
|
||||
tomlFile, err := os.Open(configFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open config file %v: %v", configFilePath, err)
|
||||
return getDefaultConfig(), nil
|
||||
}
|
||||
defer tomlFile.Close()
|
||||
|
||||
@@ -80,22 +80,26 @@ func loadConfigFrom(reader io.Reader) (*Config, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return getConfigFrom(toml), nil
|
||||
return getConfigFrom(toml)
|
||||
}
|
||||
|
||||
// getConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getConfigFrom(toml *toml.Tree) *Config {
|
||||
func getConfigFrom(toml *toml.Tree) (*Config, error) {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
|
||||
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
|
||||
cfg.NVIDIAContainerRuntimeConfig = *getRuntimeConfigFrom(toml)
|
||||
runtimeConfig, err := getRuntimeConfigFrom(toml)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err)
|
||||
}
|
||||
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
|
||||
|
||||
return cfg
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// getDefaultConfig defines the default values for the config
|
||||
|
||||
@@ -62,9 +62,14 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
Experimental: false,
|
||||
DiscoverMode: "auto",
|
||||
LogLevel: "info",
|
||||
Runtimes: []string{"docker-runc", "runc"},
|
||||
Mode: "auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "nvidia-ctk",
|
||||
@@ -79,6 +84,9 @@ func TestGetConfig(t *testing.T) {
|
||||
"nvidia-container-runtime.experimental = true",
|
||||
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
|
||||
"nvidia-container-runtime.log-level = \"debug\"",
|
||||
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
|
||||
"nvidia-container-runtime.mode = \"not-auto\"",
|
||||
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
expectedConfig: &Config{
|
||||
@@ -87,9 +95,14 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
Experimental: true,
|
||||
DiscoverMode: "not-legacy",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "/foo/bar/nvidia-ctk",
|
||||
@@ -106,6 +119,10 @@ func TestGetConfig(t *testing.T) {
|
||||
"experimental = true",
|
||||
"discover-mode = \"not-legacy\"",
|
||||
"log-level = \"debug\"",
|
||||
"runtimes = [\"/some/runtime\",]",
|
||||
"mode = \"not-auto\"",
|
||||
"[nvidia-container-runtime.modes.csv]",
|
||||
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"[nvidia-ctk]",
|
||||
"path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
@@ -115,9 +132,14 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
Experimental: true,
|
||||
DiscoverMode: "not-legacy",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "/foo/bar/nvidia-ctk",
|
||||
|
||||
125
internal/config/crio/crio.go
Normal file
125
internal/config/crio/crio.go
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package crio
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// LoadConfig loads the cri-o config from disk
|
||||
func LoadConfig(config string) (*toml.Tree, error) {
|
||||
log.Infof("Loading config: %v", config)
|
||||
|
||||
info, err := os.Stat(config)
|
||||
if os.IsExist(err) && info.IsDir() {
|
||||
return nil, fmt.Errorf("config file is a directory")
|
||||
}
|
||||
|
||||
configFile := config
|
||||
if os.IsNotExist(err) {
|
||||
configFile = "/dev/null"
|
||||
log.Infof("Config file does not exist, creating new one")
|
||||
}
|
||||
|
||||
cfg, err := toml.LoadFile(configFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Infof("Successfully loaded config")
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime
|
||||
func UpdateConfig(config *toml.Tree, runtimeClass string, runtimePath string, setAsDefault bool) error {
|
||||
switch runc := config.Get("crio.runtime.runtimes.runc").(type) {
|
||||
case *toml.Tree:
|
||||
runc, _ = toml.Load(runc.String())
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass}, runc)
|
||||
}
|
||||
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_path"}, runtimePath)
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_type"}, "oci")
|
||||
|
||||
if setAsDefault {
|
||||
config.SetPath([]string{"crio", "runtime", "default_runtime"}, runtimeClass)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime
|
||||
func RevertConfig(config *toml.Tree, runtimeClass string) error {
|
||||
if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok {
|
||||
if runtimeClass == runtime {
|
||||
config.DeletePath([]string{"crio", "runtime", "default_runtime"})
|
||||
}
|
||||
}
|
||||
|
||||
runtimeClassPath := []string{"crio", "runtime", "runtimes", runtimeClass}
|
||||
config.DeletePath(runtimeClassPath)
|
||||
for i := 0; i < len(runtimeClassPath); i++ {
|
||||
remainingPath := runtimeClassPath[:len(runtimeClassPath)-i]
|
||||
if entry, ok := config.GetPath(remainingPath).(*toml.Tree); ok {
|
||||
if len(entry.Keys()) != 0 {
|
||||
break
|
||||
}
|
||||
config.DeletePath(remainingPath)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FlushConfig flushes the updated/reverted config out to disk
|
||||
func FlushConfig(config string, cfg *toml.Tree) error {
|
||||
log.Infof("Flushing config")
|
||||
|
||||
output, err := cfg.ToTomlString()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to TOML: %v", err)
|
||||
}
|
||||
|
||||
switch len(output) {
|
||||
case 0:
|
||||
err := os.Remove(config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to remove empty file: %v", err)
|
||||
}
|
||||
log.Infof("Config empty, removing file")
|
||||
default:
|
||||
f, err := os.Create(config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open '%v' for writing: %v", config, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Successfully flushed config")
|
||||
|
||||
return nil
|
||||
}
|
||||
117
internal/config/docker/docker.go
Normal file
117
internal/config/docker/docker.go
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// LoadConfig loads the docker config from disk
|
||||
func LoadConfig(configFilePath string) (map[string]interface{}, error) {
|
||||
log.Infof("Loading docker config from %v", configFilePath)
|
||||
|
||||
info, err := os.Stat(configFilePath)
|
||||
if os.IsExist(err) && info.IsDir() {
|
||||
return nil, fmt.Errorf("config file is a directory")
|
||||
}
|
||||
|
||||
cfg := make(map[string]interface{})
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
log.Infof("Config file does not exist, creating new one")
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
readBytes, err := ioutil.ReadFile(configFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read config: %v", err)
|
||||
}
|
||||
|
||||
reader := bytes.NewReader(readBytes)
|
||||
if err := json.NewDecoder(reader).Decode(&cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Infof("Successfully loaded config")
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// UpdateConfig updates the docker config to include the nvidia runtimes
|
||||
func UpdateConfig(config map[string]interface{}, runtimeName string, runtimePath string, setAsDefault bool) error {
|
||||
// Read the existing runtimes
|
||||
runtimes := make(map[string]interface{})
|
||||
if _, exists := config["runtimes"]; exists {
|
||||
runtimes = config["runtimes"].(map[string]interface{})
|
||||
}
|
||||
|
||||
// Add / update the runtime definitions
|
||||
runtimes[runtimeName] = map[string]interface{}{
|
||||
"path": runtimePath,
|
||||
"args": []string{},
|
||||
}
|
||||
|
||||
// Update the runtimes definition
|
||||
if len(runtimes) > 0 {
|
||||
config["runtimes"] = runtimes
|
||||
}
|
||||
|
||||
if setAsDefault {
|
||||
config["default-runtime"] = runtimeName
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FlushConfig flushes the updated/reverted config out to disk
|
||||
func FlushConfig(cfg map[string]interface{}, configFilePath string) error {
|
||||
log.Infof("Flushing docker config to %v", configFilePath)
|
||||
|
||||
output, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to JSON: %v", err)
|
||||
}
|
||||
|
||||
switch len(output) {
|
||||
case 0:
|
||||
err := os.Remove(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to remove empty file: %v", err)
|
||||
}
|
||||
log.Infof("Config empty, removing file")
|
||||
default:
|
||||
f, err := os.Create(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open %v for writing: %v", configFilePath, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(string(output))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Successfully flushed config")
|
||||
|
||||
return nil
|
||||
}
|
||||
215
internal/config/docker/docker_test.go
Normal file
215
internal/config/docker/docker_test.go
Normal file
@@ -0,0 +1,215 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestUpdateConfigDefaultRuntime(t *testing.T) {
|
||||
testCases := []struct {
|
||||
config map[string]interface{}
|
||||
runtimeName string
|
||||
setAsDefault bool
|
||||
expectedDefaultRuntimeName interface{}
|
||||
}{
|
||||
{
|
||||
setAsDefault: false,
|
||||
expectedDefaultRuntimeName: nil,
|
||||
},
|
||||
{
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: true,
|
||||
expectedDefaultRuntimeName: "NAME",
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"default-runtime": "ALREADY_SET",
|
||||
},
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: false,
|
||||
expectedDefaultRuntimeName: "ALREADY_SET",
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"default-runtime": "ALREADY_SET",
|
||||
},
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: true,
|
||||
expectedDefaultRuntimeName: "NAME",
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
if tc.config == nil {
|
||||
tc.config = make(map[string]interface{})
|
||||
}
|
||||
err := UpdateConfig(tc.config, tc.runtimeName, "", tc.setAsDefault)
|
||||
require.NoError(t, err)
|
||||
|
||||
defaultRuntimeName := tc.config["default-runtime"]
|
||||
require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateConfigRuntimes(t *testing.T) {
|
||||
testCases := []struct {
|
||||
config map[string]interface{}
|
||||
runtimes map[string]string
|
||||
expectedConfig map[string]interface{}
|
||||
}{
|
||||
{
|
||||
config: map[string]interface{}{},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
"runtime2": "/test/runtime/dir/runtime2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
"runtime2": "/test/runtime/dir/runtime2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"not-nvidia": map[string]interface{}{
|
||||
"path": "some-other-path",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"not-nvidia": map[string]interface{}{
|
||||
"path": "some-other-path",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
for runtimeName, runtimePath := range tc.runtimes {
|
||||
err := UpdateConfig(tc.config, runtimeName, runtimePath, false)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
configContent, err := json.MarshalIndent(tc.config, "", " ")
|
||||
require.NoError(t, err)
|
||||
|
||||
expectedContent, err := json.MarshalIndent(tc.expectedConfig, "", " ")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, string(expectedContent), string(configContent))
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
54
internal/config/image/capabilities.go
Normal file
54
internal/config/image/capabilities.go
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
// DriverCapability represents the possible values of NVIDIA_DRIVER_CAPABILITIES
|
||||
type DriverCapability string
|
||||
|
||||
// Constants for the supported driver capabilities
|
||||
const (
|
||||
DriverCapabilityAll DriverCapability = "all"
|
||||
DriverCapabilityCompat32 DriverCapability = "compat32"
|
||||
DriverCapabilityCompute DriverCapability = "compute"
|
||||
DriverCapabilityDisplay DriverCapability = "display"
|
||||
DriverCapabilityGraphics DriverCapability = "graphics"
|
||||
DriverCapabilityNgx DriverCapability = "ngx"
|
||||
DriverCapabilityUtility DriverCapability = "utility"
|
||||
DriverCapabilityVideo DriverCapability = "video"
|
||||
)
|
||||
|
||||
// DriverCapabilities represents the NVIDIA_DRIVER_CAPABILITIES set for the specified image.
|
||||
type DriverCapabilities map[DriverCapability]bool
|
||||
|
||||
// Has check whether the specified capability is selected.
|
||||
func (c DriverCapabilities) Has(capability DriverCapability) bool {
|
||||
if c[DriverCapabilityAll] {
|
||||
return true
|
||||
}
|
||||
return c[capability]
|
||||
}
|
||||
|
||||
// Any checks whether any of the specified capabilites are set
|
||||
func (c DriverCapabilities) Any(capabilities ...DriverCapability) bool {
|
||||
for _, cap := range capabilities {
|
||||
if c.Has(cap) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
190
internal/config/image/cuda_image.go
Normal file
190
internal/config/image/cuda_image.go
Normal file
@@ -0,0 +1,190 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
)
|
||||
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
// a map of environment variable to values that can be used to perform lookups
|
||||
// such as requirements.
|
||||
type CUDA map[string]string
|
||||
|
||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||
// The process environment is read (if present) to construc the CUDA Image.
|
||||
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||
if spec == nil || spec.Process == nil {
|
||||
return NewCUDAImageFromEnv(nil)
|
||||
}
|
||||
|
||||
return NewCUDAImageFromEnv(spec.Process.Env)
|
||||
}
|
||||
|
||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
// is a list of strings of the form ENVAR=VALUE.
|
||||
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
c := make(CUDA)
|
||||
|
||||
for _, e := range env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
c[parts[0]] = parts[1]
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||
func (i CUDA) IsLegacy() bool {
|
||||
legacyCudaVersion := i[envCUDAVersion]
|
||||
cudaRequire := i[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
|
||||
// variables.
|
||||
func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// TODO: We need not process this if disable require is set, but this will be done
|
||||
// in a single follow-up to ensure that the behavioural change is accurately captured.
|
||||
// if i.HasDisableRequire() {
|
||||
// return nil, nil
|
||||
// }
|
||||
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range i {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
if i.IsLegacy() {
|
||||
v, err := i.legacyVersion()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get version: %v", err)
|
||||
}
|
||||
cudaRequire := fmt.Sprintf("cuda>=%s", v)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
}
|
||||
return requirements, nil
|
||||
}
|
||||
|
||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||
func (i CUDA) HasDisableRequire() bool {
|
||||
if disable, exists := i[envNVDisableRequire]; exists {
|
||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||
d, _ := strconv.ParseBool(disable)
|
||||
return d
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
// We concantenate all the devices from the specified envvars.
|
||||
var isSet bool
|
||||
var devices []string
|
||||
requested := make(map[string]bool)
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := i[envVar]; ok {
|
||||
isSet = true
|
||||
for _, d := range strings.Split(devs, ",") {
|
||||
trimmed := strings.TrimSpace(d)
|
||||
if len(trimmed) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, trimmed)
|
||||
requested[trimmed] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if !isSet && len(devices) == 0 && i.IsLegacy() {
|
||||
return NewVisibleDevices("all")
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if len(devices) == 0 || requested["void"] {
|
||||
return NewVisibleDevices("void")
|
||||
}
|
||||
|
||||
return NewVisibleDevices(devices...)
|
||||
}
|
||||
|
||||
// GetDriverCapabilities returns the requested driver capabilities.
|
||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
env := i[envNVDriverCapabilities]
|
||||
|
||||
capabilites := make(DriverCapabilities)
|
||||
for _, c := range strings.Split(env, ",") {
|
||||
capabilites[DriverCapability(c)] = true
|
||||
}
|
||||
|
||||
return capabilites
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid CUDA version: %v", err)
|
||||
}
|
||||
|
||||
return majorMinor, nil
|
||||
}
|
||||
|
||||
func parseMajorMinorVersion(version string) (string, error) {
|
||||
vVersion := "v" + strings.TrimPrefix(version, "v")
|
||||
|
||||
if !semver.IsValid(vVersion) {
|
||||
return "", fmt.Errorf("invalid version string")
|
||||
}
|
||||
|
||||
majorMinor := strings.TrimPrefix(semver.MajorMinor(vVersion), "v")
|
||||
parts := strings.Split(majorMinor, ".")
|
||||
|
||||
var err error
|
||||
_, err = strconv.ParseUint(parts[0], 10, 32)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid major version")
|
||||
}
|
||||
_, err = strconv.ParseUint(parts[1], 10, 32)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid minor version")
|
||||
}
|
||||
return majorMinor, nil
|
||||
}
|
||||
123
internal/config/image/cuda_image_test.go
Normal file
123
internal/config/image/cuda_image_test.go
Normal file
@@ -0,0 +1,123 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseMajorMinorVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
expected string
|
||||
}{
|
||||
{"0", "0.0"},
|
||||
{"8", "8.0"},
|
||||
{"7.5", "7.5"},
|
||||
{"9.0.116", "9.0"},
|
||||
{"4294967295.4294967295.4294967295", "4294967295.4294967295"},
|
||||
{"v11.6", "11.6"},
|
||||
}
|
||||
for _, c := range tests {
|
||||
t.Run(c.version, func(t *testing.T) {
|
||||
version, err := parseMajorMinorVersion(c.version)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, c.expected, version)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMajorMinorVersionInvalid(t *testing.T) {
|
||||
var tests = []string{
|
||||
"foo",
|
||||
"foo.5.10",
|
||||
"9.0.116.50",
|
||||
"9.0.116foo",
|
||||
"7.foo",
|
||||
"9.0.bar",
|
||||
"9.4294967296",
|
||||
"9.0.116.",
|
||||
"9..0",
|
||||
"9.",
|
||||
".5.10",
|
||||
"-9",
|
||||
"+9",
|
||||
"-9.1.116",
|
||||
"-9.-1.-116",
|
||||
}
|
||||
for _, c := range tests {
|
||||
t.Run(c, func(t *testing.T) {
|
||||
_, err := parseMajorMinorVersion(c)
|
||||
require.Error(t, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRequirements(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
env []string
|
||||
requirements []string
|
||||
}{
|
||||
{
|
||||
description: "NVIDIA_REQUIRE_JETPACK is ignored",
|
||||
env: []string{"NVIDIA_REQUIRE_JETPACK=csv-mounts=all"},
|
||||
requirements: nil,
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS is ignored",
|
||||
env: []string{"NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS=base-only"},
|
||||
requirements: nil,
|
||||
},
|
||||
{
|
||||
description: "single requirement set",
|
||||
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6"},
|
||||
requirements: []string{"cuda>=11.6"},
|
||||
},
|
||||
{
|
||||
description: "requirements are concatenated requirement set",
|
||||
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
|
||||
requirements: []string{"cuda>=11.6", "brand=tesla"},
|
||||
},
|
||||
{
|
||||
description: "legacy image",
|
||||
env: []string{"CUDA_VERSION=11.6"},
|
||||
requirements: []string{"cuda>=11.6"},
|
||||
},
|
||||
{
|
||||
description: "legacy image with additional requirement",
|
||||
env: []string{"CUDA_VERSION=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
|
||||
requirements: []string{"cuda>=11.6", "brand=tesla"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, err := NewCUDAImageFromEnv(tc.env)
|
||||
require.NoError(t, err)
|
||||
|
||||
requirements, err := image.GetRequirements()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, tc.requirements, requirements)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
127
internal/config/image/devices.go
Normal file
127
internal/config/image/devices.go
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// VisibleDevices represents the devices selected in a container image
|
||||
// through the NVIDIA_VISIBLE_DEVICES or other environment variables
|
||||
type VisibleDevices interface {
|
||||
List() []string
|
||||
Has(string) bool
|
||||
}
|
||||
|
||||
var _ VisibleDevices = (*all)(nil)
|
||||
var _ VisibleDevices = (*none)(nil)
|
||||
var _ VisibleDevices = (*void)(nil)
|
||||
var _ VisibleDevices = (*devices)(nil)
|
||||
|
||||
// NewVisibleDevices creates a VisibleDevices based on the value of the specified envvar.
|
||||
func NewVisibleDevices(envvars ...string) VisibleDevices {
|
||||
for _, envvar := range envvars {
|
||||
if envvar == "all" {
|
||||
return all{}
|
||||
}
|
||||
if envvar == "none" {
|
||||
return none{}
|
||||
}
|
||||
if envvar == "" || envvar == "void" {
|
||||
return void{}
|
||||
}
|
||||
}
|
||||
|
||||
return newDevices(envvars...)
|
||||
}
|
||||
|
||||
type all struct{}
|
||||
|
||||
// List returns ["all"] for all devices
|
||||
func (a all) List() []string {
|
||||
return []string{"all"}
|
||||
}
|
||||
|
||||
// Has for all devices is true for any id except the empty ID
|
||||
func (a all) Has(id string) bool {
|
||||
return id != ""
|
||||
}
|
||||
|
||||
type none struct{}
|
||||
|
||||
// List returns [""] for the none devices
|
||||
func (n none) List() []string {
|
||||
return []string{""}
|
||||
}
|
||||
|
||||
// Has for none devices is false for any id
|
||||
func (n none) Has(id string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
type void struct {
|
||||
none
|
||||
}
|
||||
|
||||
// List returns nil for the void devices
|
||||
func (v void) List() []string {
|
||||
return nil
|
||||
}
|
||||
|
||||
type devices struct {
|
||||
len int
|
||||
lookup map[string]int
|
||||
}
|
||||
|
||||
func newDevices(idOrCommaSeparated ...string) devices {
|
||||
lookup := make(map[string]int)
|
||||
|
||||
i := 0
|
||||
for _, commaSeparated := range idOrCommaSeparated {
|
||||
for _, id := range strings.Split(commaSeparated, ",") {
|
||||
lookup[id] = i
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
d := devices{
|
||||
len: i,
|
||||
lookup: lookup,
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// List returns the list of requested devices
|
||||
func (d devices) List() []string {
|
||||
list := make([]string, d.len)
|
||||
|
||||
for id, i := range d.lookup {
|
||||
list[i] = id
|
||||
}
|
||||
|
||||
return list
|
||||
}
|
||||
|
||||
// Has checks whether the specified ID is in the set of requested devices
|
||||
func (d devices) Has(id string) bool {
|
||||
if id == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
_, exist := d.lookup[id]
|
||||
return exist
|
||||
}
|
||||
@@ -17,42 +17,84 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerRuncExecutableName = "docker-runc"
|
||||
runcExecutableName = "runc"
|
||||
|
||||
auto = "auto"
|
||||
)
|
||||
|
||||
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
|
||||
type RuntimeConfig struct {
|
||||
DebugFilePath string
|
||||
Experimental bool
|
||||
DiscoverMode string
|
||||
DebugFilePath string `toml:"debug"`
|
||||
// LogLevel defines the logging level for the application
|
||||
LogLevel string
|
||||
LogLevel string `toml:"log-level"`
|
||||
// Runtimes defines the candidates for the low-level runtime
|
||||
Runtimes []string `toml:"runtimes"`
|
||||
Mode string `toml:"mode"`
|
||||
Modes modesConfig `toml:"modes"`
|
||||
}
|
||||
|
||||
// modesConfig defines (optional) per-mode configs
|
||||
type modesConfig struct {
|
||||
CSV csvModeConfig `toml:"csv"`
|
||||
CDI cdiModeConfig `toml:"cdi"`
|
||||
}
|
||||
|
||||
type cdiModeConfig struct {
|
||||
// SpecDirs allows for the default spec dirs for CDI to be overridden
|
||||
SpecDirs []string `toml:"spec-dirs"`
|
||||
}
|
||||
|
||||
type csvModeConfig struct {
|
||||
MountSpecPath string `toml:"mount-spec-path"`
|
||||
}
|
||||
|
||||
// dummy allows us to unmarshal only a RuntimeConfig from a *toml.Tree
|
||||
type dummy struct {
|
||||
Runtime RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
}
|
||||
|
||||
// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getRuntimeConfigFrom(toml *toml.Tree) *RuntimeConfig {
|
||||
func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
|
||||
cfg := GetDefaultRuntimeConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
cfg.DebugFilePath = toml.GetDefault("nvidia-container-runtime.debug", cfg.DebugFilePath).(string)
|
||||
cfg.Experimental = toml.GetDefault("nvidia-container-runtime.experimental", cfg.Experimental).(bool)
|
||||
cfg.DiscoverMode = toml.GetDefault("nvidia-container-runtime.discover-mode", cfg.DiscoverMode).(string)
|
||||
cfg.LogLevel = toml.GetDefault("nvidia-container-runtime.log-level", cfg.LogLevel).(string)
|
||||
d := dummy{
|
||||
Runtime: *cfg,
|
||||
}
|
||||
|
||||
return cfg
|
||||
if err := toml.Unmarshal(&d); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err)
|
||||
}
|
||||
|
||||
return &d.Runtime, nil
|
||||
}
|
||||
|
||||
// GetDefaultRuntimeConfig defines the default values for the config
|
||||
func GetDefaultRuntimeConfig() *RuntimeConfig {
|
||||
c := RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
Experimental: false,
|
||||
DiscoverMode: "auto",
|
||||
LogLevel: logrus.InfoLevel.String(),
|
||||
Runtimes: []string{
|
||||
dockerRuncExecutableName,
|
||||
runcExecutableName,
|
||||
},
|
||||
Mode: auto,
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
|
||||
137
internal/cuda/cuda.go
Normal file
137
internal/cuda/cuda.go
Normal file
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cuda
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
)
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
|
||||
#ifdef _WIN32
|
||||
#define CUDAAPI __stdcall
|
||||
#else
|
||||
#define CUDAAPI
|
||||
#endif
|
||||
|
||||
typedef int CUdevice;
|
||||
|
||||
typedef enum CUdevice_attribute_enum {
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
|
||||
} CUdevice_attribute;
|
||||
|
||||
typedef enum cudaError_enum {
|
||||
CUDA_SUCCESS = 0
|
||||
} CUresult;
|
||||
|
||||
CUresult CUDAAPI cuInit(unsigned int Flags);
|
||||
CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
|
||||
CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
|
||||
CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||
*/
|
||||
import "C"
|
||||
|
||||
const (
|
||||
libraryName = "libcuda.so.1"
|
||||
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
|
||||
)
|
||||
|
||||
// cuda stores a reference the cuda dynamic library
|
||||
var lib *dl.DynamicLibrary
|
||||
|
||||
// Version returns the CUDA version of the driver as a string or an error if this
|
||||
// cannot be determined.
|
||||
func Version() (string, error) {
|
||||
lib, err := load()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer lib.Close()
|
||||
|
||||
if err := lib.Lookup("cuDriverGetVersion"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
|
||||
var version C.int
|
||||
if result := C.cuDriverGetVersion(&version); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA version: result=%v", result)
|
||||
}
|
||||
|
||||
major := version / 1000
|
||||
minor := version % 100 / 10
|
||||
|
||||
return fmt.Sprintf("%d.%d", major, minor), nil
|
||||
}
|
||||
|
||||
// ComputeCapability returns the CUDA compute capability of a device with the specified index as a string
|
||||
// or an error if this cannot be determined.
|
||||
func ComputeCapability(index int) (string, error) {
|
||||
lib, err := load()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer lib.Close()
|
||||
|
||||
if err := lib.Lookup("cuInit"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
if err := lib.Lookup("cuDeviceGet"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
if err := lib.Lookup("cuDeviceGetAttribute"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
|
||||
if result := C.cuInit(C.uint(0)); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to initialize CUDA: result=%v", result)
|
||||
}
|
||||
|
||||
var device C.CUdevice
|
||||
// NOTE: We only query the first device
|
||||
if result := C.cuDeviceGet(&device, C.int(index)); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA device %v: result=%v", 0, result)
|
||||
}
|
||||
|
||||
var major C.int
|
||||
if result := C.cuDeviceGetAttribute(&major, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA compute capability major for device %v : result=%v", 0, result)
|
||||
}
|
||||
|
||||
var minor C.int
|
||||
if result := C.cuDeviceGetAttribute(&minor, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA compute capability minor for device %v: result=%v", 0, result)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%d.%d", major, minor), nil
|
||||
}
|
||||
|
||||
func load() (*dl.DynamicLibrary, error) {
|
||||
lib := dl.New(libraryName, libraryLoadFlags)
|
||||
if lib == nil {
|
||||
return nil, fmt.Errorf("error instantiating DynamicLibrary for CUDA")
|
||||
}
|
||||
err := lib.Open()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening DynamicLibrary for CUDA: %v", err)
|
||||
}
|
||||
|
||||
return lib, nil
|
||||
}
|
||||
62
internal/discover/char_devices.go
Normal file
62
internal/discover/char_devices.go
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// charDevices is a discover for a list of character devices
|
||||
type charDevices mounts
|
||||
|
||||
var _ Discover = (*charDevices)(nil)
|
||||
|
||||
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
|
||||
func NewCharDeviceDiscoverer(logger *logrus.Logger, devices []string, root string) Discover {
|
||||
locator := lookup.NewCharDeviceLocator(logger, root)
|
||||
|
||||
return NewDeviceDiscoverer(logger, locator, root, devices)
|
||||
}
|
||||
|
||||
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
|
||||
func NewDeviceDiscoverer(logger *logrus.Logger, locator lookup.Locator, root string, devices []string) Discover {
|
||||
m := NewMounts(logger, locator, root, devices).(*mounts)
|
||||
|
||||
return (*charDevices)(m)
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts for the charDevices.
|
||||
// Since this explicitly specifies a device list, the mounts are nil.
|
||||
func (d *charDevices) Mounts() ([]Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Devices returns the discovered devices for the charDevices.
|
||||
// Here the device nodes are first discovered as mounts and these are converted to devices.
|
||||
func (d *charDevices) Devices() ([]Device, error) {
|
||||
devicesAsMounts, err := (*mounts)(d).Mounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
83
internal/discover/char_devices_test.go
Normal file
83
internal/discover/char_devices_test.go
Normal file
@@ -0,0 +1,83 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCharDevices(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
input *charDevices
|
||||
expectedMounts []Mount
|
||||
expectedMountsError error
|
||||
expectedDevicesError error
|
||||
expectedDevices []Device
|
||||
}{
|
||||
{
|
||||
description: "dev mounts are empty",
|
||||
input: (*charDevices)(
|
||||
&mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
),
|
||||
expectedDevices: []Device{{Path: "located", HostPath: "located"}},
|
||||
},
|
||||
{
|
||||
description: "dev devices returns error for nil lookup",
|
||||
input: &charDevices{},
|
||||
expectedDevicesError: fmt.Errorf("no lookup defined"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tc.input.logger = logger
|
||||
|
||||
mounts, err := tc.input.Mounts()
|
||||
if tc.expectedMountsError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedMounts, mounts)
|
||||
|
||||
devices, err := tc.input.Devices()
|
||||
if tc.expectedDevicesError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -24,11 +24,6 @@ import (
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// charDevices is a discover for a list of character devices
|
||||
type charDevices mounts
|
||||
|
||||
var _ Discover = (*charDevices)(nil)
|
||||
|
||||
// NewFromCSVFiles creates a discoverer for the specified CSV files. A logger is also supplied.
|
||||
// The constructed discoverer is comprised of a list, with each element in the list being associated with a
|
||||
// single CSV files.
|
||||
@@ -47,23 +42,21 @@ func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discov
|
||||
csv.MountSpecSym: symlinkLocator,
|
||||
}
|
||||
|
||||
var discoverers []Discover
|
||||
var mountSpecs []*csv.MountSpec
|
||||
for _, filename := range files {
|
||||
d, err := NewFromCSVFile(logger, locators, filename)
|
||||
targets, err := loadCSVFile(logger, filename)
|
||||
if err != nil {
|
||||
logger.Warnf("Skipping CSV file %v: %v", filename, err)
|
||||
continue
|
||||
}
|
||||
discoverers = append(discoverers, d)
|
||||
mountSpecs = append(mountSpecs, targets...)
|
||||
}
|
||||
|
||||
return &list{discoverers: discoverers}, nil
|
||||
return newFromMountSpecs(logger, locators, root, mountSpecs)
|
||||
}
|
||||
|
||||
// NewFromCSVFile creates a discoverer for the specified CSV file. A logger is also supplied.
|
||||
// The constructed discoverer is comprised of a list, with each element in the list being associated with a particular
|
||||
// MountSpecType.
|
||||
func NewFromCSVFile(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, filename string) (Discover, error) {
|
||||
// loadCSVFile loads the specified CSV file and returns the list of mount specs
|
||||
func loadCSVFile(logger *logrus.Logger, filename string) ([]*csv.MountSpec, error) {
|
||||
// Create a discoverer for each file-kind combination
|
||||
targets, err := csv.NewCSVFileParser(logger, filename).Parse()
|
||||
if err != nil {
|
||||
@@ -73,12 +66,12 @@ func NewFromCSVFile(logger *logrus.Logger, locators map[csv.MountSpecType]lookup
|
||||
return nil, fmt.Errorf("CSV file is empty")
|
||||
}
|
||||
|
||||
return newFromMountSpecs(logger, locators, targets)
|
||||
return targets, nil
|
||||
}
|
||||
|
||||
// newFromMountSpecs creates a discoverer for the CSV file. A logger is also supplied.
|
||||
// A list of csvDiscoverers is returned, with each being associated with a single MountSpecType.
|
||||
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, targets []*csv.MountSpec) (Discover, error) {
|
||||
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, root string, targets []*csv.MountSpec) (Discover, error) {
|
||||
if len(targets) == 0 {
|
||||
return &None{}, nil
|
||||
}
|
||||
@@ -99,41 +92,16 @@ func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]loo
|
||||
return nil, fmt.Errorf("no locator defined for '%v'", t)
|
||||
}
|
||||
|
||||
m := &mounts{
|
||||
logger: logger,
|
||||
lookup: locator,
|
||||
required: candidatesByType[t],
|
||||
}
|
||||
|
||||
var m Discover
|
||||
switch t {
|
||||
case csv.MountSpecDev:
|
||||
// For device mount specs, we insert a charDevices into the list of discoverers.
|
||||
discoverers = append(discoverers, (*charDevices)(m))
|
||||
m = NewDeviceDiscoverer(logger, locator, root, candidatesByType[t])
|
||||
default:
|
||||
discoverers = append(discoverers, m)
|
||||
m = NewMounts(logger, locator, root, candidatesByType[t])
|
||||
}
|
||||
discoverers = append(discoverers, m)
|
||||
|
||||
}
|
||||
|
||||
return &list{discoverers: discoverers}, nil
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts for the charDevices. Since this explicitly specifies a
|
||||
// device list, the mounts are nil.
|
||||
func (d *charDevices) Mounts() ([]Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Devices returns the discovered devices for the charDevices. Here the device nodes are first
|
||||
// discovered as mounts and these are converted to devices.
|
||||
func (d *charDevices) Devices() ([]Device, error) {
|
||||
devicesAsMounts, err := (*mounts)(d).Mounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
@@ -26,63 +26,6 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCharDevices(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
input *charDevices
|
||||
expectedMounts []Mount
|
||||
expectedMountsError error
|
||||
expectedDevicesError error
|
||||
expectedDevices []Device
|
||||
}{
|
||||
{
|
||||
description: "dev mounts are empty",
|
||||
input: (*charDevices)(
|
||||
&mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
),
|
||||
expectedDevices: []Device{{Path: "located"}},
|
||||
},
|
||||
{
|
||||
description: "dev devices returns error for nil lookup",
|
||||
input: &charDevices{},
|
||||
expectedDevicesError: fmt.Errorf("no lookup defined"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tc.input.logger = logger
|
||||
|
||||
mounts, err := tc.input.Mounts()
|
||||
if tc.expectedMountsError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedMounts, mounts)
|
||||
|
||||
devices, err := tc.input.Devices()
|
||||
if tc.expectedDevicesError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewFromMountSpec(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
@@ -93,6 +36,7 @@ func TestNewFromMountSpec(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
root string
|
||||
targets []*csv.MountSpec
|
||||
expectedError error
|
||||
expectedDiscoverer Discover
|
||||
@@ -133,12 +77,50 @@ func TestNewFromMountSpec(t *testing.T) {
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["dev"],
|
||||
root: "/",
|
||||
required: []string{"dev0", "dev1"},
|
||||
},
|
||||
),
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["lib"],
|
||||
root: "/",
|
||||
required: []string{"lib0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "sets root",
|
||||
targets: []*csv.MountSpec{
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev0",
|
||||
},
|
||||
{
|
||||
Type: "lib",
|
||||
Path: "lib0",
|
||||
},
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev1",
|
||||
},
|
||||
},
|
||||
root: "/some/root",
|
||||
expectedDiscoverer: &list{
|
||||
discoverers: []Discover{
|
||||
(*charDevices)(
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["dev"],
|
||||
root: "/some/root",
|
||||
required: []string{"dev0", "dev1"},
|
||||
},
|
||||
),
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["lib"],
|
||||
root: "/some/root",
|
||||
required: []string{"lib0"},
|
||||
},
|
||||
},
|
||||
@@ -148,7 +130,7 @@ func TestNewFromMountSpec(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
discoverer, err := newFromMountSpecs(logger, locators, tc.targets)
|
||||
discoverer, err := newFromMountSpecs(logger, locators, tc.root, tc.targets)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
|
||||
@@ -24,12 +24,14 @@ type Config struct {
|
||||
|
||||
// Device represents a discovered character device.
|
||||
type Device struct {
|
||||
Path string
|
||||
HostPath string
|
||||
Path string
|
||||
}
|
||||
|
||||
// Mount represents a discovered mount.
|
||||
type Mount struct {
|
||||
Path string
|
||||
HostPath string
|
||||
Path string
|
||||
}
|
||||
|
||||
// Hook represents a discovered hook.
|
||||
|
||||
62
internal/discover/filter.go
Normal file
62
internal/discover/filter.go
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import "github.com/sirupsen/logrus"
|
||||
|
||||
// Filter defines an interface for filtering discovered entities
|
||||
type Filter interface {
|
||||
DeviceIsSelected(device Device) bool
|
||||
}
|
||||
|
||||
// filtered represents a filtered discoverer
|
||||
type filtered struct {
|
||||
Discover
|
||||
logger *logrus.Logger
|
||||
filter Filter
|
||||
}
|
||||
|
||||
// newFilteredDisoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
|
||||
func newFilteredDisoverer(logger *logrus.Logger, applyTo Discover, filter Filter) Discover {
|
||||
return filtered{
|
||||
Discover: applyTo,
|
||||
logger: logger,
|
||||
filter: filter,
|
||||
}
|
||||
}
|
||||
|
||||
// Devices returns a filtered list of devices based on the specified filter.
|
||||
func (d filtered) Devices() ([]Device, error) {
|
||||
devices, err := d.Discover.Devices()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if d.filter == nil {
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
var selected []Device
|
||||
for _, device := range devices {
|
||||
if d.filter.DeviceIsSelected(device) {
|
||||
selected = append(selected, device)
|
||||
}
|
||||
d.logger.Debugf("skipping device %v", device)
|
||||
}
|
||||
|
||||
return selected, nil
|
||||
}
|
||||
77
internal/discover/gds.go
Normal file
77
internal/discover/gds.go
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type gdsDeviceDiscoverer struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
devices Discover
|
||||
mounts Discover
|
||||
}
|
||||
|
||||
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
|
||||
func NewGDSDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
[]string{"/dev/nvidia-fs*"},
|
||||
root,
|
||||
)
|
||||
|
||||
udev := NewMounts(
|
||||
logger,
|
||||
lookup.NewDirectoryLocator(logger, root),
|
||||
root,
|
||||
[]string{"/run/udev"},
|
||||
)
|
||||
|
||||
cufile := NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(logger, root),
|
||||
root,
|
||||
[]string{"/etc/cufile.json"},
|
||||
)
|
||||
|
||||
d := gdsDeviceDiscoverer{
|
||||
logger: logger,
|
||||
devices: devices,
|
||||
mounts: Merge(udev, cufile),
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
// Devices discovers the nvidia-fs device nodes for use with GPUDirect Storage
|
||||
func (d *gdsDeviceDiscoverer) Devices() ([]Device, error) {
|
||||
return d.devices.Devices()
|
||||
}
|
||||
|
||||
// Mounts discovers the required mounts for GPUDirect Storage.
|
||||
// If no devices are discovered the discovered mounts are empty
|
||||
func (d *gdsDeviceDiscoverer) Mounts() ([]Mount, error) {
|
||||
devices, err := d.Devices()
|
||||
if err != nil || len(devices) == 0 {
|
||||
d.logger.Debugf("No nvidia-fs devices detected; skipping detection of mounts")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return d.mounts.Mounts()
|
||||
}
|
||||
254
internal/discover/graphics.go
Normal file
254
internal/discover/graphics.go
Normal file
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
|
||||
func NewGraphicsDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, cfg *Config) (Discover, error) {
|
||||
root := cfg.Root
|
||||
|
||||
locator, err := lookup.NewLibraryLocator(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct library locator: %v", err)
|
||||
}
|
||||
libraries := NewMounts(
|
||||
logger,
|
||||
locator,
|
||||
root,
|
||||
[]string{
|
||||
"libnvidia-egl-gbm.so",
|
||||
},
|
||||
)
|
||||
|
||||
jsonMounts := NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(logger, root),
|
||||
root,
|
||||
[]string{
|
||||
// TODO: We should handle this more cleanly
|
||||
"/etc/glvnd/egl_vendor.d/10_nvidia.json",
|
||||
"/etc/vulkan/icd.d/nvidia_icd.json",
|
||||
"/etc/vulkan/implicit_layer.d/nvidia_layers.json",
|
||||
"/usr/share/glvnd/egl_vendor.d/10_nvidia.json",
|
||||
"/usr/share/vulkan/icd.d/nvidia_icd.json",
|
||||
"/usr/share/vulkan/implicit_layer.d/nvidia_layers.json",
|
||||
"/usr/share/egl/egl_external_platform.d/15_nvidia_gbm.json",
|
||||
},
|
||||
)
|
||||
|
||||
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
|
||||
}
|
||||
|
||||
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, cfg)
|
||||
|
||||
discover := Merge(
|
||||
Merge(drmDeviceNodes, drmByPathSymlinks),
|
||||
libraries,
|
||||
jsonMounts,
|
||||
)
|
||||
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
type drmDevicesByPath struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
lookup lookup.Locator
|
||||
nvidiaCTKExecutablePath string
|
||||
root string
|
||||
devicesFrom Discover
|
||||
}
|
||||
|
||||
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
|
||||
func newCreateDRMByPathSymlinks(logger *logrus.Logger, devices Discover, cfg *Config) Discover {
|
||||
d := drmDevicesByPath{
|
||||
logger: logger,
|
||||
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
|
||||
nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath,
|
||||
root: cfg.Root,
|
||||
devicesFrom: devices,
|
||||
}
|
||||
|
||||
return &d
|
||||
}
|
||||
|
||||
// Hooks returns a hook to create the symlinks from the required CSV files
|
||||
func (d drmDevicesByPath) Hooks() ([]Hook, error) {
|
||||
devices, err := d.devicesFrom.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover devices for by-path symlinks: %v", err)
|
||||
}
|
||||
if len(devices) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
hookPath := nvidiaCTKDefaultFilePath
|
||||
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "hook", "create-symlinks"}
|
||||
links, err := d.getSpecificLinkArgs(devices)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine specific links: %v", err)
|
||||
}
|
||||
for _, l := range links {
|
||||
args = append(args, "--link", l)
|
||||
}
|
||||
|
||||
h := Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// getSpecificLinkArgs returns the required specic links that need to be created
|
||||
func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error) {
|
||||
selectedDevices := make(map[string]bool)
|
||||
for _, d := range devices {
|
||||
selectedDevices[filepath.Base(d.HostPath)] = true
|
||||
}
|
||||
|
||||
linkLocator := lookup.NewFileLocator(d.logger, d.root)
|
||||
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate devices by path: %v", err)
|
||||
}
|
||||
|
||||
var links []string
|
||||
for _, c := range candidates {
|
||||
device, err := os.Readlink(c)
|
||||
if err != nil {
|
||||
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", c)
|
||||
continue
|
||||
}
|
||||
|
||||
if selectedDevices[filepath.Base(device)] {
|
||||
d.logger.Debugf("adding device symlink %v -> %v", c, device)
|
||||
links = append(links, fmt.Sprintf("%v::%v", device, c))
|
||||
}
|
||||
}
|
||||
|
||||
return links, nil
|
||||
}
|
||||
|
||||
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
|
||||
func newDRMDeviceDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, root string) (Discover, error) {
|
||||
allDevices := NewDeviceDiscoverer(
|
||||
logger,
|
||||
lookup.NewCharDeviceLocator(logger, root),
|
||||
root,
|
||||
[]string{
|
||||
"/dev/dri/card*",
|
||||
"/dev/dri/renderD*",
|
||||
},
|
||||
)
|
||||
|
||||
filter, err := newDRMDeviceFilter(logger, devices, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
|
||||
}
|
||||
|
||||
// We return a discoverer that applies the DRM device filter created above to all discovered DRM device nodes.
|
||||
d := newFilteredDisoverer(
|
||||
logger,
|
||||
allDevices,
|
||||
filter,
|
||||
)
|
||||
|
||||
return d, err
|
||||
}
|
||||
|
||||
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
|
||||
func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, root string) (Filter, error) {
|
||||
gpuInformationPaths, err := proc.GetInformationFilePaths(root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read GPU information: %v", err)
|
||||
}
|
||||
|
||||
var selectedBusIds []string
|
||||
for _, f := range gpuInformationPaths {
|
||||
info, err := proc.ParseGPUInformationFile(f)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %v: %v", f, err)
|
||||
}
|
||||
uuid := info[proc.GPUInfoGPUUUID]
|
||||
busID := info[proc.GPUInfoBusLocation]
|
||||
minor := info[proc.GPUInfoDeviceMinor]
|
||||
|
||||
if devices.Has(minor) || devices.Has(uuid) || devices.Has(busID) {
|
||||
selectedBusIds = append(selectedBusIds, busID)
|
||||
}
|
||||
}
|
||||
|
||||
filter := make(selectDeviceByPath)
|
||||
for _, busID := range selectedBusIds {
|
||||
drmDeviceNodes, err := drm.GetDeviceNodesByBusID(busID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine DRM devices for %v: %v", busID, err)
|
||||
}
|
||||
for _, drmDeviceNode := range drmDeviceNodes {
|
||||
filter[filepath.Join(drmDeviceNode)] = true
|
||||
}
|
||||
}
|
||||
|
||||
return filter, nil
|
||||
}
|
||||
|
||||
// selectDeviceByPath is a filter that allows devices to be selected by the path
|
||||
type selectDeviceByPath map[string]bool
|
||||
|
||||
var _ Filter = (*selectDeviceByPath)(nil)
|
||||
|
||||
// DeviceIsSelected determines whether the device's path has been selected
|
||||
func (s selectDeviceByPath) DeviceIsSelected(device Device) bool {
|
||||
return s[device.Path]
|
||||
}
|
||||
|
||||
// MountIsSelected is always true
|
||||
func (s selectDeviceByPath) MountIsSelected(Mount) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// HookIsSelected is always true
|
||||
func (s selectDeviceByPath) HookIsSelected(Hook) bool {
|
||||
return true
|
||||
}
|
||||
@@ -19,7 +19,6 @@ package discover
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
@@ -57,70 +56,86 @@ func (d ldconfig) Hooks() ([]Hook, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
|
||||
}
|
||||
h := CreateLDCacheUpdateHook(
|
||||
d.logger,
|
||||
d.lookup,
|
||||
d.nvidiaCTKExecutablePath,
|
||||
nvidiaCTKDefaultFilePath,
|
||||
getLibraryPaths(mounts),
|
||||
)
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
libDirs := getLibDirs(mounts)
|
||||
|
||||
hookPath := nvidiaCTKDefaultFilePath
|
||||
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
|
||||
// CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache
|
||||
func CreateLDCacheUpdateHook(logger *logrus.Logger, lookup lookup.Locator, execuable string, defaultPath string, libraries []string) Hook {
|
||||
hookPath := defaultPath
|
||||
targets, err := lookup.Locate(execuable)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
|
||||
logger.Warnf("Failed to locate %v: %v", execuable, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
|
||||
logger.Warnf("%v not found", execuable)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
|
||||
logger.Debugf("Found %v candidates: %v", execuable, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "hook", "update-ldcache"}
|
||||
for _, f := range libDirs {
|
||||
args = append(args, "--folders", f)
|
||||
args := []string{filepath.Base(hookPath), "hook", "update-ldcache"}
|
||||
for _, f := range uniqueFolders(libraries) {
|
||||
args = append(args, "--folder", f)
|
||||
}
|
||||
h := Hook{
|
||||
return Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// getLibDirs extracts the library dirs from the specified mounts
|
||||
func getLibDirs(mounts []Mount) []string {
|
||||
// getLibraryPaths extracts the library dirs from the specified mounts
|
||||
func getLibraryPaths(mounts []Mount) []string {
|
||||
var paths []string
|
||||
checked := make(map[string]bool)
|
||||
|
||||
for _, m := range mounts {
|
||||
dir := filepath.Dir(m.Path)
|
||||
if dir == "" {
|
||||
if !isLibName(m.Path) {
|
||||
continue
|
||||
}
|
||||
|
||||
_, exists := checked[dir]
|
||||
if exists {
|
||||
continue
|
||||
}
|
||||
checked[dir] = isLibName(filepath.Base(m.Path))
|
||||
|
||||
if checked[dir] {
|
||||
paths = append(paths, dir)
|
||||
}
|
||||
paths = append(paths, m.Path)
|
||||
}
|
||||
|
||||
sort.Strings(paths)
|
||||
|
||||
return paths
|
||||
}
|
||||
|
||||
// isLibName checks if the specified filename is a library (i.e. ends in `.so*`)
|
||||
func isLibName(filename string) bool {
|
||||
parts := strings.Split(filename, ".")
|
||||
|
||||
for _, p := range parts {
|
||||
if p == "so" {
|
||||
return true
|
||||
}
|
||||
base := filepath.Base(filename)
|
||||
|
||||
isLib, err := filepath.Match("lib?*.so*", base)
|
||||
if !isLib || err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return false
|
||||
parts := strings.Split(base, ".so")
|
||||
if len(parts) == 1 {
|
||||
return true
|
||||
}
|
||||
|
||||
return parts[len(parts)-1] == "" || strings.HasPrefix(parts[len(parts)-1], ".")
|
||||
}
|
||||
|
||||
// uniqueFolders returns the unique set of folders for the specified files
|
||||
func uniqueFolders(libraries []string) []string {
|
||||
var paths []string
|
||||
checked := make(map[string]bool)
|
||||
|
||||
for _, l := range libraries {
|
||||
dir := filepath.Dir(l)
|
||||
if dir == "" {
|
||||
continue
|
||||
}
|
||||
if checked[dir] {
|
||||
continue
|
||||
}
|
||||
checked[dir] = true
|
||||
paths = append(paths, dir)
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
164
internal/discover/ldconfig_test.go
Normal file
164
internal/discover/ldconfig_test.go
Normal file
@@ -0,0 +1,164 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestLDCacheUpdateHook(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
cfg := Config{
|
||||
Root: "/",
|
||||
NVIDIAContainerToolkitCLIExecutablePath: "/foo/bar/nvidia-ctk",
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
mounts []Mount
|
||||
mountError error
|
||||
expectedError error
|
||||
expectedArgs []string
|
||||
}{
|
||||
{
|
||||
description: "empty mounts",
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache"},
|
||||
},
|
||||
{
|
||||
description: "mount error",
|
||||
mountError: fmt.Errorf("mountError"),
|
||||
expectedError: fmt.Errorf("mountError"),
|
||||
},
|
||||
{
|
||||
description: "library folders are added to args",
|
||||
mounts: []Mount{
|
||||
{
|
||||
Path: "/usr/local/lib/libfoo.so",
|
||||
},
|
||||
{
|
||||
Path: "/usr/bin/notlib",
|
||||
},
|
||||
{
|
||||
Path: "/usr/local/libother/libfoo.so",
|
||||
},
|
||||
{
|
||||
Path: "/usr/local/lib/libbar.so",
|
||||
},
|
||||
},
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
|
||||
},
|
||||
{
|
||||
description: "host paths are ignored",
|
||||
mounts: []Mount{
|
||||
{
|
||||
HostPath: "/usr/local/other/libfoo.so",
|
||||
Path: "/usr/local/lib/libfoo.so",
|
||||
},
|
||||
},
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
mountMock := &DiscoverMock{
|
||||
MountsFunc: func() ([]Mount, error) {
|
||||
return tc.mounts, tc.mountError
|
||||
},
|
||||
}
|
||||
expectedHook := Hook{
|
||||
Path: "/usr/bin/nvidia-ctk",
|
||||
Args: tc.expectedArgs,
|
||||
Lifecycle: "createContainer",
|
||||
}
|
||||
|
||||
d, err := NewLDCacheUpdateHook(logger, mountMock, &cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.Len(t, mountMock.MountsCalls(), 1)
|
||||
require.Len(t, mountMock.DevicesCalls(), 0)
|
||||
require.Len(t, mountMock.HooksCalls(), 0)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, hooks, 1)
|
||||
|
||||
require.EqualValues(t, hooks[0], expectedHook)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, mounts)
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIsLibName(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
isLib bool
|
||||
}{
|
||||
{
|
||||
name: "",
|
||||
isLib: false,
|
||||
},
|
||||
{
|
||||
name: "lib/not/.so",
|
||||
isLib: false,
|
||||
},
|
||||
{
|
||||
name: "lib.so",
|
||||
isLib: false,
|
||||
},
|
||||
{
|
||||
name: "notlibcuda.so",
|
||||
isLib: false,
|
||||
},
|
||||
{
|
||||
name: "libcuda.so",
|
||||
isLib: true,
|
||||
},
|
||||
{
|
||||
name: "libcuda.so.1",
|
||||
isLib: true,
|
||||
},
|
||||
{
|
||||
name: "libcuda.soNOT",
|
||||
isLib: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
require.Equal(t, tc.isLib, isLibName(tc.name))
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewLegacyDiscoverer creates a discoverer for the experimental runtime
|
||||
func NewLegacyDiscoverer(logger *logrus.Logger, cfg *Config) (Discover, error) {
|
||||
d := legacy{
|
||||
logger: logger,
|
||||
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
type legacy struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
lookup lookup.Locator
|
||||
}
|
||||
|
||||
var _ Discover = (*legacy)(nil)
|
||||
|
||||
// Hooks returns the "legacy" NVIDIA Container Runtime hook. This hook calls out
|
||||
// to the nvidia-container-cli to make modifications to the container as defined
|
||||
// in libnvidia-container.
|
||||
func (d legacy) Hooks() ([]Hook, error) {
|
||||
hookPath := filepath.Join(config.DefaultExecutableDir, config.NVIDIAContainerRuntimeHookExecutable)
|
||||
targets, err := d.lookup.Locate(config.NVIDIAContainerRuntimeHookExecutable)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", config.NVIDIAContainerRuntimeHookExecutable, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", config.NVIDIAContainerRuntimeHookExecutable)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", config.NVIDIAContainerRuntimeHookExecutable, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Runtime Hook path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "--force", "prestart"}
|
||||
h := Hook{
|
||||
Lifecycle: cdi.PrestartHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
@@ -27,8 +27,8 @@ type list struct {
|
||||
|
||||
var _ Discover = (*list)(nil)
|
||||
|
||||
// NewList creates a discoverer that is the composite of a list of discoveres.
|
||||
func NewList(d ...Discover) Discover {
|
||||
// Merge creates a discoverer that is the composite of a list of discoveres.
|
||||
func Merge(d ...Discover) Discover {
|
||||
l := list{
|
||||
discoverers: d,
|
||||
}
|
||||
|
||||
35
internal/discover/mofed.go
Normal file
35
internal/discover/mofed.go
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
|
||||
func NewMOFEDDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
[]string{
|
||||
"/dev/infiniband/uverbs*",
|
||||
"/dev/infiniband/rdma_cm",
|
||||
},
|
||||
root,
|
||||
)
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
@@ -18,6 +18,8 @@ package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
@@ -31,6 +33,7 @@ type mounts struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
lookup lookup.Locator
|
||||
root string
|
||||
required []string
|
||||
sync.Mutex
|
||||
cache []Mount
|
||||
@@ -38,6 +41,16 @@ type mounts struct {
|
||||
|
||||
var _ Discover = (*mounts)(nil)
|
||||
|
||||
// NewMounts creates a discoverer for the required mounts using the specified locator.
|
||||
func NewMounts(logger *logrus.Logger, lookup lookup.Locator, root string, required []string) Discover {
|
||||
return &mounts{
|
||||
logger: logger,
|
||||
lookup: lookup,
|
||||
root: filepath.Join("/", root),
|
||||
required: required,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *mounts) Mounts() ([]Mount, error) {
|
||||
if d.lookup == nil {
|
||||
return nil, fmt.Errorf("no lookup defined")
|
||||
@@ -51,7 +64,7 @@ func (d *mounts) Mounts() ([]Mount, error) {
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
|
||||
paths := make(map[string]bool)
|
||||
uniqueMounts := make(map[string]Mount)
|
||||
|
||||
for _, candidate := range d.required {
|
||||
d.logger.Debugf("Locating %v", candidate)
|
||||
@@ -66,20 +79,39 @@ func (d *mounts) Mounts() ([]Mount, error) {
|
||||
}
|
||||
d.logger.Debugf("Located %v as %v", candidate, located)
|
||||
for _, p := range located {
|
||||
paths[p] = true
|
||||
if _, ok := uniqueMounts[p]; ok {
|
||||
d.logger.Debugf("Skipping duplicate mount %v", p)
|
||||
continue
|
||||
}
|
||||
|
||||
r := d.relativeTo(p)
|
||||
if r == "" {
|
||||
r = p
|
||||
}
|
||||
|
||||
d.logger.Infof("Selecting %v as %v", p, r)
|
||||
uniqueMounts[p] = Mount{
|
||||
HostPath: p,
|
||||
Path: r,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var mounts []Mount
|
||||
for path := range paths {
|
||||
d.logger.Infof("Selecting %v", path)
|
||||
mount := Mount{
|
||||
Path: path,
|
||||
}
|
||||
mounts = append(mounts, mount)
|
||||
for _, m := range uniqueMounts {
|
||||
mounts = append(mounts, m)
|
||||
}
|
||||
|
||||
d.cache = mounts
|
||||
|
||||
return mounts, nil
|
||||
return d.cache, nil
|
||||
}
|
||||
|
||||
// relativeTo returns the path relative to the root for the file locator
|
||||
func (d *mounts) relativeTo(path string) string {
|
||||
if d.root == "/" {
|
||||
return path
|
||||
}
|
||||
|
||||
return strings.TrimPrefix(path, d.root)
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "located"}},
|
||||
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
|
||||
},
|
||||
{
|
||||
description: "mounts removes located duplicates",
|
||||
@@ -83,7 +83,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "located"}},
|
||||
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips located errors",
|
||||
@@ -98,7 +98,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "error", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "required0"}, {Path: "required1"}},
|
||||
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips unlocated",
|
||||
@@ -113,10 +113,10 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "empty", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "required0"}, {Path: "required1"}},
|
||||
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips unlocated",
|
||||
description: "mounts adds multiple",
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
@@ -129,10 +129,25 @@ func TestMounts(t *testing.T) {
|
||||
required: []string{"required0", "multiple", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{
|
||||
{Path: "required0"},
|
||||
{Path: "multiple0"},
|
||||
{Path: "multiple1"},
|
||||
{Path: "required1"},
|
||||
{Path: "required0", HostPath: "required0"},
|
||||
{Path: "multiple0", HostPath: "multiple0"},
|
||||
{Path: "multiple1", HostPath: "multiple1"},
|
||||
{Path: "required1", HostPath: "required1"},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "mounts uses relative path",
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
return []string{"/some/root/located"}, nil
|
||||
},
|
||||
},
|
||||
root: "/some/root",
|
||||
required: []string{"required0", "multiple", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{
|
||||
{Path: "/located", HostPath: "/some/root/located"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -17,6 +17,10 @@
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -28,15 +32,17 @@ type symlinks struct {
|
||||
lookup lookup.Locator
|
||||
nvidiaCTKExecutablePath string
|
||||
csvFiles []string
|
||||
mountsFrom Discover
|
||||
}
|
||||
|
||||
// NewCreateSymlinksHook creates a discoverer for a hook that creates required symlinks in the container
|
||||
func NewCreateSymlinksHook(logger *logrus.Logger, csvFiles []string, cfg *Config) (Discover, error) {
|
||||
func NewCreateSymlinksHook(logger *logrus.Logger, csvFiles []string, mounts Discover, cfg *Config) (Discover, error) {
|
||||
d := symlinks{
|
||||
logger: logger,
|
||||
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
|
||||
nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath,
|
||||
csvFiles: csvFiles,
|
||||
mountsFrom: mounts,
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
@@ -58,9 +64,15 @@ func (d symlinks) Hooks() ([]Hook, error) {
|
||||
|
||||
args := []string{hookPath, "hook", "create-symlinks"}
|
||||
for _, f := range d.csvFiles {
|
||||
args = append(args, "--csv-filenames", f)
|
||||
args = append(args, "--csv-filename", f)
|
||||
}
|
||||
|
||||
links, err := d.getSpecificLinkArgs()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine specific links: %v", err)
|
||||
}
|
||||
args = append(args, links...)
|
||||
|
||||
h := Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
@@ -69,3 +81,45 @@ func (d symlinks) Hooks() ([]Hook, error) {
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// getSpecificLinkArgs returns the required specic links that need to be created
|
||||
func (d symlinks) getSpecificLinkArgs() ([]string, error) {
|
||||
mounts, err := d.mountsFrom.Mounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
|
||||
}
|
||||
|
||||
linkProcessed := make(map[string]bool)
|
||||
var links []string
|
||||
for _, m := range mounts {
|
||||
var target string
|
||||
var link string
|
||||
|
||||
lib := filepath.Base(m.Path)
|
||||
|
||||
if strings.HasPrefix(lib, "libcuda.so") {
|
||||
// XXX Many applications wrongly assume that libcuda.so exists (e.g. with dlopen).
|
||||
target = "libcuda.so.1"
|
||||
link = "libcuda.so"
|
||||
} else if strings.HasPrefix(lib, "libGLX_nvidia.so") {
|
||||
// XXX GLVND requires this symlink for indirect GLX support.
|
||||
target = lib
|
||||
link = "libGLX_indirect.so.0"
|
||||
} else if strings.HasPrefix(lib, "libnvidia-opticalflow.so") {
|
||||
// XXX Fix missing symlink for libnvidia-opticalflow.so.
|
||||
target = "libnvidia-opticalflow.so.1"
|
||||
link = "libnvidia-opticalflow.so"
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
if linkProcessed[link] {
|
||||
continue
|
||||
}
|
||||
|
||||
linkPath := filepath.Join(filepath.Dir(m.Path), link)
|
||||
links = append(links, "--link", fmt.Sprintf("%v::%v", target, linkPath))
|
||||
linkProcessed[link] = true
|
||||
}
|
||||
|
||||
return links, nil
|
||||
}
|
||||
|
||||
@@ -17,29 +17,51 @@
|
||||
package edits
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
)
|
||||
|
||||
type device discover.Device
|
||||
|
||||
// toEdits converts a discovered device to CDI Container Edits.
|
||||
func (d device) toEdits() *cdi.ContainerEdits {
|
||||
func (d device) toEdits() (*cdi.ContainerEdits, error) {
|
||||
deviceNode, err := d.toSpec()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
e := cdi.ContainerEdits{
|
||||
ContainerEdits: &specs.ContainerEdits{
|
||||
DeviceNodes: []*specs.DeviceNode{d.toSpec()},
|
||||
DeviceNodes: []*specs.DeviceNode{deviceNode},
|
||||
},
|
||||
}
|
||||
return &e
|
||||
return &e, nil
|
||||
}
|
||||
|
||||
// toSpec converts a discovered Device to a CDI Spec Device. Note
|
||||
// that missing info is filled in when edits are applied by querying the Device node.
|
||||
func (d device) toSpec() *specs.DeviceNode {
|
||||
func (d device) toSpec() (*specs.DeviceNode, error) {
|
||||
// NOTE: This mirrors what cri-o does.
|
||||
// https://github.com/cri-o/cri-o/blob/ca3bb80a3dda0440659fcf8da8ed6f23211de94e/internal/config/device/device.go#L93
|
||||
// This can be removed once https://github.com/container-orchestrated-devices/container-device-interface/issues/72 is addressed
|
||||
dev, err := devices.DeviceFromPath(d.HostPath, "rwm")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query device node %v: %v", d.HostPath, err)
|
||||
}
|
||||
s := specs.DeviceNode{
|
||||
Path: d.Path,
|
||||
Path: d.Path,
|
||||
Type: string(dev.Type),
|
||||
Major: dev.Major,
|
||||
Minor: dev.Minor,
|
||||
FileMode: &dev.FileMode,
|
||||
UID: &dev.Uid,
|
||||
GID: &dev.Gid,
|
||||
}
|
||||
|
||||
return &s
|
||||
return &s, nil
|
||||
}
|
||||
|
||||
@@ -51,7 +51,11 @@ func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier,
|
||||
|
||||
c := cdi.ContainerEdits{}
|
||||
for _, d := range devices {
|
||||
c.Append(device(d).toEdits())
|
||||
edits, err := device(d).toEdits()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to created container edits for device: %v", err)
|
||||
}
|
||||
c.Append(edits)
|
||||
}
|
||||
|
||||
for _, m := range mounts {
|
||||
|
||||
@@ -38,8 +38,7 @@ func (d mount) toEdits() *cdi.ContainerEdits {
|
||||
// that missing info is filled in when edits are applied by querying the Mount node.
|
||||
func (d mount) toSpec() *specs.Mount {
|
||||
s := specs.Mount{
|
||||
HostPath: d.Path,
|
||||
// TODO: We need to allow the container path to be customised
|
||||
HostPath: d.HostPath,
|
||||
ContainerPath: d.Path,
|
||||
Options: []string{
|
||||
"ro",
|
||||
|
||||
50
internal/info/auto.go
Normal file
50
internal/info/auto.go
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
|
||||
// Logger is a basic interface for logging to allow these functions to be called
|
||||
// from code where logrus is not used.
|
||||
type Logger interface {
|
||||
Infof(string, ...interface{})
|
||||
Debugf(string, ...interface{})
|
||||
}
|
||||
|
||||
// ResolveAutoMode determines the correct mode for the platform if set to "auto"
|
||||
func ResolveAutoMode(logger Logger, mode string) (rmode string) {
|
||||
if mode != "auto" {
|
||||
return mode
|
||||
}
|
||||
defer func() {
|
||||
logger.Infof("Auto-detected mode as '%v'", rmode)
|
||||
}()
|
||||
|
||||
nvinfo := info.New()
|
||||
|
||||
isTegra, reason := nvinfo.IsTegraSystem()
|
||||
logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
|
||||
|
||||
hasNVML, reason := nvinfo.HasNvml()
|
||||
logger.Debugf("Has NVML? %v: %v", hasNVML, reason)
|
||||
|
||||
if isTegra && !hasNVML {
|
||||
return "csv"
|
||||
}
|
||||
|
||||
return "legacy"
|
||||
}
|
||||
53
internal/info/auto_test.go
Normal file
53
internal/info/auto_test.go
Normal file
@@ -0,0 +1,53 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestResolveAutoMode(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
mode string
|
||||
expectedMode string
|
||||
}{
|
||||
{
|
||||
description: "non-auto resolves to input",
|
||||
mode: "not-auto",
|
||||
expectedMode: "not-auto",
|
||||
},
|
||||
// TODO: The following test is brittle in that it will break on Tegra-based systems.
|
||||
// {
|
||||
// description: "auto resolves to legacy",
|
||||
// mode: "auto",
|
||||
// expectedMode: "legacy",
|
||||
// },
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
mode := ResolveAutoMode(logger, tc.mode)
|
||||
require.EqualValues(t, tc.expectedMode, mode)
|
||||
})
|
||||
}
|
||||
}
|
||||
39
internal/info/drm/drm_devices.go
Normal file
39
internal/info/drm/drm_devices.go
Normal file
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package drm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GetDeviceNodesByBusID returns the DRM devices associated with the specified PCI bus ID
|
||||
func GetDeviceNodesByBusID(busID string) ([]string, error) {
|
||||
drmRoot := filepath.Join("/sys/bus/pci/devices", busID, "drm")
|
||||
matches, err := filepath.Glob(fmt.Sprintf("%s/*", drmRoot))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var drmDeviceNodes []string
|
||||
for _, m := range matches {
|
||||
drmDeviceNode := filepath.Join("/dev/dri", filepath.Base(m))
|
||||
drmDeviceNodes = append(drmDeviceNodes, drmDeviceNode)
|
||||
}
|
||||
|
||||
return drmDeviceNodes, nil
|
||||
}
|
||||
89
internal/info/proc/information_files.go
Normal file
89
internal/info/proc/information_files.go
Normal file
@@ -0,0 +1,89 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package proc
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GPUInfoField represents the field name for information specified in a GPU's information file
|
||||
type GPUInfoField string
|
||||
|
||||
// The following constants define the fields of interest from the GPU information file
|
||||
const (
|
||||
GPUInfoModel = GPUInfoField("Model")
|
||||
GPUInfoGPUUUID = GPUInfoField("GPU UUID")
|
||||
GPUInfoBusLocation = GPUInfoField("Bus Location")
|
||||
GPUInfoDeviceMinor = GPUInfoField("Device Minor")
|
||||
)
|
||||
|
||||
// GPUInfo stores the information for a GPU as determined from its associated information file
|
||||
type GPUInfo map[GPUInfoField]string
|
||||
|
||||
// GetInformationFilePaths returns the list of information files associated with NVIDIA GPUs.
|
||||
func GetInformationFilePaths(root string) ([]string, error) {
|
||||
return filepath.Glob(filepath.Join(root, "/proc/driver/nvidia/gpus/*/information"))
|
||||
}
|
||||
|
||||
// ParseGPUInformationFile parses the specified GPU information file and constructs a GPUInfo structure
|
||||
func ParseGPUInformationFile(path string) (GPUInfo, error) {
|
||||
infoFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open %v: %v", path, err)
|
||||
}
|
||||
defer infoFile.Close()
|
||||
|
||||
return gpuInfoFrom(infoFile), nil
|
||||
}
|
||||
|
||||
// gpuInfoFrom parses a GPUInfo struct from the specified reader
|
||||
// An information file has the following strucutre:
|
||||
// $ cat /proc/driver/nvidia/gpus/0000\:06\:00.0/information
|
||||
// Model: Tesla V100-SXM2-16GB
|
||||
// IRQ: 408
|
||||
// GPU UUID: GPU-edfee158-11c1-52b8-0517-92f30e7fac88
|
||||
// Video BIOS: 88.00.41.00.01
|
||||
// Bus Type: PCIe
|
||||
// DMA Size: 47 bits
|
||||
// DMA Mask: 0x7fffffffffff
|
||||
// Bus Location: 0000:06:00.0
|
||||
// Device Minor: 0
|
||||
// GPU Excluded: No
|
||||
func gpuInfoFrom(reader io.Reader) GPUInfo {
|
||||
info := make(GPUInfo)
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
field := GPUInfoField(parts[0])
|
||||
value := strings.TrimSpace(parts[1])
|
||||
|
||||
info[field] = value
|
||||
}
|
||||
|
||||
return info
|
||||
}
|
||||
43
internal/info/version.go
Normal file
43
internal/info/version.go
Normal file
@@ -0,0 +1,43 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import "strings"
|
||||
|
||||
// version must be set by go build's -X main.version= option in the Makefile.
|
||||
var version = "unknown"
|
||||
|
||||
// gitCommit will be the hash that the binary was built from
|
||||
// and will be populated by the Makefile
|
||||
var gitCommit = ""
|
||||
|
||||
// GetVersionParts returns the different version components
|
||||
func GetVersionParts() []string {
|
||||
v := []string{version}
|
||||
|
||||
if gitCommit != "" {
|
||||
v = append(v, "commit: "+gitCommit)
|
||||
}
|
||||
|
||||
return v
|
||||
}
|
||||
|
||||
// GetVersionString returns the string representation of the version
|
||||
func GetVersionString(more ...string) string {
|
||||
v := append(GetVersionParts(), more...)
|
||||
return strings.Join(v, "\n")
|
||||
}
|
||||
292
internal/ldcache/ldcache.go
Normal file
292
internal/ldcache/ldcache.go
Normal file
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
// Adapted from https://github.com/rai-project/ldcache
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const ldcachePath = "/etc/ld.so.cache"
|
||||
|
||||
const (
|
||||
magicString1 = "ld.so-1.7.0"
|
||||
magicString2 = "glibc-ld.so.cache"
|
||||
magicVersion = "1.1"
|
||||
)
|
||||
|
||||
const (
|
||||
flagTypeMask = 0x00ff
|
||||
flagTypeELF = 0x0001
|
||||
|
||||
flagArchMask = 0xff00
|
||||
flagArchI386 = 0x0000
|
||||
flagArchX8664 = 0x0300
|
||||
flagArchX32 = 0x0800
|
||||
flagArchPpc64le = 0x0500
|
||||
)
|
||||
|
||||
var errInvalidCache = errors.New("invalid ld.so.cache file")
|
||||
|
||||
type header1 struct {
|
||||
Magic [len(magicString1) + 1]byte // include null delimiter
|
||||
NLibs uint32
|
||||
}
|
||||
|
||||
type entry1 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
}
|
||||
|
||||
type header2 struct {
|
||||
Magic [len(magicString2)]byte
|
||||
Version [len(magicVersion)]byte
|
||||
NLibs uint32
|
||||
TableSize uint32
|
||||
_ [3]uint32 // unused
|
||||
_ uint64 // force 8 byte alignment
|
||||
}
|
||||
|
||||
type entry2 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
OSVersion uint32
|
||||
HWCap uint64
|
||||
}
|
||||
|
||||
// LDCache represents the interface for performing lookups into the LDCache
|
||||
type LDCache interface {
|
||||
List() ([]string, []string)
|
||||
Lookup(...string) ([]string, []string)
|
||||
}
|
||||
|
||||
type ldcache struct {
|
||||
*bytes.Reader
|
||||
|
||||
data, libs []byte
|
||||
header header2
|
||||
entries []entry2
|
||||
|
||||
root string
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// New creates a new LDCache with the specified logger and root.
|
||||
func New(logger *log.Logger, root string) (LDCache, error) {
|
||||
path := filepath.Join(root, ldcachePath)
|
||||
|
||||
logger.Debugf("Opening ld.conf at %v", path)
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d, err := syscall.Mmap(int(f.Fd()), 0, int(fi.Size()),
|
||||
syscall.PROT_READ, syscall.MAP_PRIVATE)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache := &ldcache{
|
||||
data: d,
|
||||
Reader: bytes.NewReader(d),
|
||||
root: root,
|
||||
logger: logger,
|
||||
}
|
||||
return cache, cache.parse()
|
||||
}
|
||||
|
||||
func (c *ldcache) Close() error {
|
||||
return syscall.Munmap(c.data)
|
||||
}
|
||||
|
||||
func (c *ldcache) Magic() string {
|
||||
return string(c.header.Magic[:])
|
||||
}
|
||||
|
||||
func (c *ldcache) Version() string {
|
||||
return string(c.header.Version[:])
|
||||
}
|
||||
|
||||
func strn(b []byte, n int) string {
|
||||
return string(b[:n])
|
||||
}
|
||||
|
||||
func (c *ldcache) parse() error {
|
||||
var header header1
|
||||
|
||||
// Check for the old format (< glibc-2.2)
|
||||
if c.Len() <= int(unsafe.Sizeof(header)) {
|
||||
return errInvalidCache
|
||||
}
|
||||
if strn(c.data, len(magicString1)) == magicString1 {
|
||||
if err := binary.Read(c, binary.LittleEndian, &header); err != nil {
|
||||
return err
|
||||
}
|
||||
n := int64(header.NLibs) * int64(unsafe.Sizeof(entry1{}))
|
||||
offset, err := c.Seek(n, 1) // skip old entries
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n = (-offset) & int64(unsafe.Alignof(c.header)-1)
|
||||
_, err = c.Seek(n, 1) // skip padding
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c.libs = c.data[c.Size()-int64(c.Len()):] // kv offsets start here
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.header); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Magic() != magicString2 || c.Version() != magicVersion {
|
||||
return errInvalidCache
|
||||
}
|
||||
c.entries = make([]entry2, c.header.NLibs)
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.entries); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// List creates a list of libraires in the ldcache.
|
||||
// The 32-bit and 64-bit libraries are returned separately.
|
||||
func (c *ldcache) List() ([]string, []string) {
|
||||
paths := make(map[int][]string)
|
||||
|
||||
processed := make(map[string]bool)
|
||||
|
||||
for _, e := range c.entries {
|
||||
bits := 0
|
||||
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
|
||||
continue
|
||||
}
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
bits = 64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
bits = 32
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
|
||||
continue
|
||||
}
|
||||
value := c.libs[e.Value:]
|
||||
|
||||
n := bytes.IndexByte(value, 0)
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
name := filepath.Join(c.root, strn(value, n))
|
||||
c.logger.Debugf("checking %v", string(name))
|
||||
|
||||
path, err := filepath.EvalSymlinks(name)
|
||||
if err != nil {
|
||||
c.logger.Debugf("could not resolve symlink for %v", name)
|
||||
break
|
||||
}
|
||||
if processed[path] {
|
||||
continue
|
||||
}
|
||||
paths[bits] = append(paths[bits], path)
|
||||
processed[path] = true
|
||||
}
|
||||
|
||||
return paths[32], paths[64]
|
||||
}
|
||||
|
||||
// Lookup searches the ldcache for the specified prefixes.
|
||||
// The 32-bit and 64-bit libraries matching the prefixes are returned.
|
||||
func (c *ldcache) Lookup(libs ...string) (paths32, paths64 []string) {
|
||||
c.logger.Debugf("Looking up %v in cache", libs)
|
||||
type void struct{}
|
||||
var paths *[]string
|
||||
|
||||
set := make(map[string]void)
|
||||
prefix := make([][]byte, len(libs))
|
||||
|
||||
for i := range libs {
|
||||
prefix[i] = []byte(libs[i])
|
||||
}
|
||||
for _, e := range c.entries {
|
||||
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
|
||||
continue
|
||||
}
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
paths = &paths64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
paths = &paths32
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
|
||||
continue
|
||||
}
|
||||
lib := c.libs[e.Key:]
|
||||
value := c.libs[e.Value:]
|
||||
|
||||
for _, p := range prefix {
|
||||
if bytes.HasPrefix(lib, p) {
|
||||
n := bytes.IndexByte(value, 0)
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
name := filepath.Join(c.root, strn(value, n))
|
||||
c.logger.Debugf("checking %v", string(name))
|
||||
|
||||
path, err := filepath.EvalSymlinks(name)
|
||||
if err != nil {
|
||||
c.logger.Debugf("could not resolve symlink for %v", name)
|
||||
break
|
||||
}
|
||||
if _, ok := set[path]; ok {
|
||||
break
|
||||
}
|
||||
set[path] = void{}
|
||||
*paths = append(*paths, path)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -42,11 +42,11 @@ func NewCharDeviceLocator(logger *logrus.Logger, root string) Locator {
|
||||
|
||||
// assertCharDevice checks whether the specified path is a char device and returns an error if this is not the case.
|
||||
func assertCharDevice(filename string) error {
|
||||
info, err := os.Stat(filename)
|
||||
info, err := os.Lstat(filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting info: %v", err)
|
||||
}
|
||||
if info.Mode()|os.ModeCharDevice == 0 {
|
||||
if info.Mode()&os.ModeCharDevice == 0 {
|
||||
return fmt.Errorf("%v is not a char device", filename)
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -25,24 +25,13 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
envPath = "PATH"
|
||||
)
|
||||
|
||||
var defaultPaths = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
|
||||
|
||||
type executable struct {
|
||||
file
|
||||
}
|
||||
|
||||
// NewExecutableLocator creates a locator to fine executable files in the path. A logger can also be specified.
|
||||
func NewExecutableLocator(logger *log.Logger, root string) Locator {
|
||||
pathEnv := os.Getenv(envPath)
|
||||
paths := filepath.SplitList(pathEnv)
|
||||
|
||||
if root != "" {
|
||||
paths = append(paths, defaultPaths...)
|
||||
}
|
||||
paths := GetPaths(root)
|
||||
|
||||
var prefixes []string
|
||||
for _, dir := range paths {
|
||||
@@ -60,18 +49,19 @@ func NewExecutableLocator(logger *log.Logger, root string) Locator {
|
||||
|
||||
var _ Locator = (*executable)(nil)
|
||||
|
||||
// Locate finds executable files in the path. If a relative or absolute path is specified, the prefix paths are not considered.
|
||||
func (p executable) Locate(filename string) ([]string, error) {
|
||||
// Locate finds executable files with the specified pattern in the path.
|
||||
// If a relative or absolute path is specified, the prefix paths are not considered.
|
||||
func (p executable) Locate(pattern string) ([]string, error) {
|
||||
// For absolute paths we ensure that it is executable
|
||||
if strings.Contains(filename, "/") {
|
||||
err := assertExecutable(filename)
|
||||
if strings.Contains(pattern, "/") {
|
||||
err := assertExecutable(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("absolute path %v is not an executable file: %v", filename, err)
|
||||
return nil, fmt.Errorf("absolute path %v is not an executable file: %v", pattern, err)
|
||||
}
|
||||
return []string{filename}, nil
|
||||
return []string{pattern}, nil
|
||||
}
|
||||
|
||||
return p.file.Locate(filename)
|
||||
return p.file.Locate(pattern)
|
||||
}
|
||||
|
||||
// assertExecutable checks whether the specified path is an execuable file.
|
||||
|
||||
@@ -50,22 +50,29 @@ func newFileLocator(logger *log.Logger, root string) file {
|
||||
|
||||
var _ Locator = (*file)(nil)
|
||||
|
||||
// Locate attempts to find the specified file. All prefixes are searched and any matching
|
||||
// candidates are returned. If no matches are found, an error is returned.
|
||||
func (p file) Locate(filename string) ([]string, error) {
|
||||
// Locate attempts to find files with names matching the specified pattern.
|
||||
// All prefixes are searched and any matching candidates are returned. If no matches are found, an error is returned.
|
||||
func (p file) Locate(pattern string) ([]string, error) {
|
||||
var filenames []string
|
||||
for _, prefix := range p.prefixes {
|
||||
candidate := filepath.Join(prefix, filename)
|
||||
p.logger.Debugf("Checking candidate '%v'", candidate)
|
||||
err := p.filter(candidate)
|
||||
pathPattern := filepath.Join(prefix, pattern)
|
||||
candidates, err := filepath.Glob(pathPattern)
|
||||
if err != nil {
|
||||
p.logger.Debugf("Candidate '%v' does not meet requirements: %v", candidate, err)
|
||||
continue
|
||||
p.logger.Debugf("Checking pattern '%v' failed: %v", pathPattern, err)
|
||||
}
|
||||
|
||||
for _, candidate := range candidates {
|
||||
p.logger.Debugf("Checking candidate '%v'", candidate)
|
||||
err := p.filter(candidate)
|
||||
if err != nil {
|
||||
p.logger.Debugf("Candidate '%v' does not meet requirements: %v", candidate, err)
|
||||
continue
|
||||
}
|
||||
filenames = append(filenames, candidate)
|
||||
}
|
||||
filenames = append(filenames, candidate)
|
||||
}
|
||||
if len(filename) == 0 {
|
||||
return nil, fmt.Errorf("file %v not found", filename)
|
||||
if len(filenames) == 0 {
|
||||
return nil, fmt.Errorf("pattern %v not found", pattern)
|
||||
}
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
68
internal/lookup/library.go
Normal file
68
internal/lookup/library.go
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type library struct {
|
||||
logger *log.Logger
|
||||
symlink Locator
|
||||
cache ldcache.LDCache
|
||||
}
|
||||
|
||||
var _ Locator = (*library)(nil)
|
||||
|
||||
// NewLibraryLocator creates a library locator using the specified logger.
|
||||
func NewLibraryLocator(logger *log.Logger, root string) (Locator, error) {
|
||||
cache, err := ldcache.New(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading ldcache: %v", err)
|
||||
}
|
||||
|
||||
l := library{
|
||||
symlink: NewSymlinkLocator(logger, root),
|
||||
cache: cache,
|
||||
}
|
||||
|
||||
return &l, nil
|
||||
}
|
||||
|
||||
// Locate finds the specified libraryname.
|
||||
// If the input is a library name, the ldcache is searched otherwise the
|
||||
// provided path is resolved as a symlink.
|
||||
func (l library) Locate(libname string) ([]string, error) {
|
||||
if strings.Contains(libname, "/") {
|
||||
return l.symlink.Locate(libname)
|
||||
}
|
||||
|
||||
paths32, paths64 := l.cache.Lookup(libname)
|
||||
if len(paths32) > 0 {
|
||||
l.logger.Warnf("Ignoring 32-bit libraries for %v: %v", libname, paths32)
|
||||
}
|
||||
|
||||
if len(paths64) == 0 {
|
||||
return nil, fmt.Errorf("64-bit library %v not found", libname)
|
||||
}
|
||||
|
||||
return paths64, nil
|
||||
}
|
||||
@@ -20,6 +20,9 @@ var _ Locator = &LocatorMock{}
|
||||
// LocateFunc: func(s string) ([]string, error) {
|
||||
// panic("mock out the Locate method")
|
||||
// },
|
||||
// RelativeFunc: func(s string) (string, error) {
|
||||
// panic("mock out the Relative method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedLocator in code that requires Locator
|
||||
@@ -30,6 +33,9 @@ type LocatorMock struct {
|
||||
// LocateFunc mocks the Locate method.
|
||||
LocateFunc func(s string) ([]string, error)
|
||||
|
||||
// RelativeFunc mocks the Relative method.
|
||||
RelativeFunc func(s string) (string, error)
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Locate holds details about calls to the Locate method.
|
||||
@@ -37,8 +43,14 @@ type LocatorMock struct {
|
||||
// S is the s argument value.
|
||||
S string
|
||||
}
|
||||
// Relative holds details about calls to the Relative method.
|
||||
Relative []struct {
|
||||
// S is the s argument value.
|
||||
S string
|
||||
}
|
||||
}
|
||||
lockLocate sync.RWMutex
|
||||
lockLocate sync.RWMutex
|
||||
lockRelative sync.RWMutex
|
||||
}
|
||||
|
||||
// Locate calls LocateFunc.
|
||||
@@ -75,3 +87,38 @@ func (mock *LocatorMock) LocateCalls() []struct {
|
||||
mock.lockLocate.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// Relative calls RelativeFunc.
|
||||
func (mock *LocatorMock) Relative(s string) (string, error) {
|
||||
callInfo := struct {
|
||||
S string
|
||||
}{
|
||||
S: s,
|
||||
}
|
||||
mock.lockRelative.Lock()
|
||||
mock.calls.Relative = append(mock.calls.Relative, callInfo)
|
||||
mock.lockRelative.Unlock()
|
||||
if mock.RelativeFunc == nil {
|
||||
var (
|
||||
sOut string
|
||||
errOut error
|
||||
)
|
||||
return sOut, errOut
|
||||
}
|
||||
return mock.RelativeFunc(s)
|
||||
}
|
||||
|
||||
// RelativeCalls gets all the calls that were made to Relative.
|
||||
// Check the length with:
|
||||
// len(mockedLocator.RelativeCalls())
|
||||
func (mock *LocatorMock) RelativeCalls() []struct {
|
||||
S string
|
||||
} {
|
||||
var calls []struct {
|
||||
S string
|
||||
}
|
||||
mock.lockRelative.RLock()
|
||||
calls = mock.calls.Relative
|
||||
mock.lockRelative.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
69
internal/lookup/path.go
Normal file
69
internal/lookup/path.go
Normal file
@@ -0,0 +1,69 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
envPath = "PATH"
|
||||
)
|
||||
|
||||
var (
|
||||
defaultPATH = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
|
||||
)
|
||||
|
||||
// GetPaths returns a list of paths for a specified root. These are constructed from the
|
||||
// PATH environment variable, a default path list, and the supplied root.
|
||||
func GetPaths(root string) []string {
|
||||
dirs := filepath.SplitList(os.Getenv(envPath))
|
||||
|
||||
inDirs := make(map[string]bool)
|
||||
for _, d := range dirs {
|
||||
inDirs[d] = true
|
||||
}
|
||||
|
||||
// directories from the environment have higher precedence
|
||||
for _, d := range defaultPATH {
|
||||
if inDirs[d] {
|
||||
// We don't add paths that are already included
|
||||
continue
|
||||
}
|
||||
dirs = append(dirs, d)
|
||||
}
|
||||
|
||||
if root != "" && root != "/" {
|
||||
rootDirs := []string{}
|
||||
for _, dir := range dirs {
|
||||
rootDirs = append(rootDirs, path.Join(root, dir))
|
||||
}
|
||||
// directories with the root prefix have higher precedence
|
||||
dirs = append(rootDirs, dirs...)
|
||||
}
|
||||
|
||||
return dirs
|
||||
}
|
||||
|
||||
// GetPath returns a colon-separated path value that can be used to set the PATH
|
||||
// environment variable
|
||||
func GetPath(root string) string {
|
||||
return strings.Join(GetPaths(root), ":")
|
||||
}
|
||||
@@ -52,10 +52,10 @@ func NewSymlinkLocator(logger *logrus.Logger, root string) Locator {
|
||||
return &l
|
||||
}
|
||||
|
||||
// Locate finds the specified file at the specified root. If the file is a symlink, the link is followed and all candidates
|
||||
// to the final target are returned.
|
||||
func (p symlinkChain) Locate(filename string) ([]string, error) {
|
||||
candidates, err := p.file.Locate(filename)
|
||||
// Locate finds the specified pattern at the specified root.
|
||||
// If the file is a symlink, the link is followed and all candidates to the final target are returned.
|
||||
func (p symlinkChain) Locate(pattern string) ([]string, error) {
|
||||
candidates, err := p.file.Locate(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -104,14 +104,15 @@ func (p symlinkChain) Locate(filename string) ([]string, error) {
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
// Locate finds the specified file at the specified root. If the file is a symlink, the link is resolved and the target returned.
|
||||
func (p symlink) Locate(filename string) ([]string, error) {
|
||||
candidates, err := p.file.Locate(filename)
|
||||
// Locate finds the specified pattern at the specified root.
|
||||
// If the file is a symlink, the link is resolved and the target returned.
|
||||
func (p symlink) Locate(pattern string) ([]string, error) {
|
||||
candidates, err := p.file.Locate(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(candidates) != 1 {
|
||||
return nil, fmt.Errorf("failed to uniquely resolve symlink %v: %v", filename, candidates)
|
||||
return nil, fmt.Errorf("failed to uniquely resolve symlink %v: %v", pattern, candidates)
|
||||
}
|
||||
|
||||
target, err := filepath.EvalSymlinks(candidates[0])
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user