mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
319 Commits
v1.10.0-rc
...
v1.12.0-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
14e587d55f | ||
|
|
66ec967de2 | ||
|
|
252693aeac | ||
|
|
079b47ed94 | ||
|
|
d2952b07aa | ||
|
|
41f1b93422 | ||
|
|
3140810c95 | ||
|
|
046d761f4c | ||
|
|
0a2083df72 | ||
|
|
80c810bf9e | ||
|
|
82ba424212 | ||
|
|
c131b99cb3 | ||
|
|
64a85fb832 | ||
|
|
ebf1772068 | ||
|
|
8604c255c4 | ||
|
|
bea8321205 | ||
|
|
db962c4bf2 | ||
|
|
d1a3de7671 | ||
|
|
8da7e74408 | ||
|
|
55eb898186 | ||
|
|
a7fc29d4bd | ||
|
|
fdb3e51294 | ||
|
|
0582180cab | ||
|
|
46667b5a8c | ||
|
|
e4e1de82ec | ||
|
|
d51c8fcfa7 | ||
|
|
9b33c34a57 | ||
|
|
0b6cd7e90e | ||
|
|
029a04c37d | ||
|
|
60c1df4e9c | ||
|
|
3e35312537 | ||
|
|
932b39fd08 | ||
|
|
78cafe45d4 | ||
|
|
584e792a5a | ||
|
|
f0bcfa0415 | ||
|
|
d45ec7bd28 | ||
|
|
153f2f6300 | ||
|
|
9df3975740 | ||
|
|
5575b391ff | ||
|
|
9faf11ddf3 | ||
|
|
d3ed27722e | ||
|
|
07a3f3040a | ||
|
|
749ab2a746 | ||
|
|
217a135eb1 | ||
|
|
22e65b320b | ||
|
|
53bb940b30 | ||
|
|
1c1ad8098a | ||
|
|
203db4390c | ||
|
|
b6d9c2c1ad | ||
|
|
429ef4d4e9 | ||
|
|
25759ca933 | ||
|
|
74abea07e2 | ||
|
|
7955bb1a84 | ||
|
|
75b11eb80a | ||
|
|
c958817eef | ||
|
|
80f8c2a418 | ||
|
|
08640a6f64 | ||
|
|
9db31f7506 | ||
|
|
7fd40632fe | ||
|
|
6ef19d2925 | ||
|
|
83ce83239b | ||
|
|
30fb486e44 | ||
|
|
0022661565 | ||
|
|
28e882f26f | ||
|
|
71fbe7a812 | ||
|
|
ce3d94af1a | ||
|
|
0bc09665a8 | ||
|
|
205ba098e9 | ||
|
|
877832da69 | ||
|
|
b7ba96a72e | ||
|
|
93c59f2d9c | ||
|
|
5a56b658ba | ||
|
|
99889671b5 | ||
|
|
a2fb017208 | ||
|
|
f7021d84b5 | ||
|
|
c793fc27d8 | ||
|
|
3d2328bdfd | ||
|
|
76b69f45de | ||
|
|
73e65edaa9 | ||
|
|
cd7ee5a435 | ||
|
|
eac4faddc6 | ||
|
|
bc8a73dde4 | ||
|
|
624b9d8ee6 | ||
|
|
9d6e2ff1b0 | ||
|
|
aca0c7bc5a | ||
|
|
db47b58275 | ||
|
|
59bf7607ce | ||
|
|
61ff3fbd7b | ||
|
|
523fc57ab4 | ||
|
|
ae18c5d847 | ||
|
|
4abdc2f35d | ||
|
|
f8748bfa9a | ||
|
|
5fb0ae2c2d | ||
|
|
899fc72014 | ||
|
|
1267c1d9a2 | ||
|
|
9a697e340b | ||
|
|
abe8ca71e0 | ||
|
|
9bbf7dcf96 | ||
|
|
ec1222b58b | ||
|
|
229b46e0ca | ||
|
|
b6a68c4add | ||
|
|
e588bfac7d | ||
|
|
224020533e | ||
|
|
3736bb3aca | ||
|
|
1e72f92b74 | ||
|
|
896f5b2e9f | ||
|
|
c068d4048f | ||
|
|
8796cd76b0 | ||
|
|
1597ede2af | ||
|
|
3dd8020695 | ||
|
|
dfa041991f | ||
|
|
568896742b | ||
|
|
f52973217f | ||
|
|
efd29f1cec | ||
|
|
4b02670049 | ||
|
|
8550874686 | ||
|
|
38513d5a53 | ||
|
|
a35236a8f6 | ||
|
|
0c2e72b7c1 | ||
|
|
f0bdfbebe4 | ||
|
|
a4fa61d05d | ||
|
|
6e23a635c6 | ||
|
|
4dedac6a24 | ||
|
|
8c1b9b33c1 | ||
|
|
d37c17857e | ||
|
|
a0065456d0 | ||
|
|
a34a571d2e | ||
|
|
bb4cfece61 | ||
|
|
b16d263ee7 | ||
|
|
027395bb8a | ||
|
|
3ecd790206 | ||
|
|
52bb9e186b | ||
|
|
68b6d1cab1 | ||
|
|
bdb67b4fba | ||
|
|
d0c39a11d5 | ||
|
|
9de6361938 | ||
|
|
fb016dca86 | ||
|
|
8beb7b4231 | ||
|
|
2b08a79206 | ||
|
|
5885fead8f | ||
|
|
a9fb7a4a88 | ||
|
|
b5dbcaeaf9 | ||
|
|
80a46d4a5c | ||
|
|
febce822d5 | ||
|
|
e8099a713c | ||
|
|
d9de4a09b8 | ||
|
|
2dbcda2619 | ||
|
|
691b93ffb0 | ||
|
|
cb0c94cd40 | ||
|
|
3168718563 | ||
|
|
dc8972a26a | ||
|
|
0a2d8f4d22 | ||
|
|
8d623967ed | ||
|
|
503ed96275 | ||
|
|
d8ba84d427 | ||
|
|
8e8c41a3bc | ||
|
|
e34fe17b45 | ||
|
|
c5b0278c58 | ||
|
|
8daa257b35 | ||
|
|
6329174cfc | ||
|
|
1ec41c1bf1 | ||
|
|
581a76de38 | ||
|
|
5d52ca8909 | ||
|
|
ad7151d394 | ||
|
|
3269a7b0e7 | ||
|
|
6a155cc606 | ||
|
|
a5bbf613e8 | ||
|
|
22427c1359 | ||
|
|
f17121fd6c | ||
|
|
256e37eb3f | ||
|
|
bdfd123b9d | ||
|
|
3f7dce202a | ||
|
|
a6d21abe14 | ||
|
|
d0f1fe2273 | ||
|
|
8de9593209 | ||
|
|
64b2b50470 | ||
|
|
4dc1451c49 | ||
|
|
211081ff25 | ||
|
|
c1c1d5cf8e | ||
|
|
e91ffef258 | ||
|
|
47c8aa3790 | ||
|
|
33b4e7fb0a | ||
|
|
936da0295b | ||
|
|
c2205c14fb | ||
|
|
56935f5743 | ||
|
|
1b3bae790c | ||
|
|
47559a8c87 | ||
|
|
86412ea821 | ||
|
|
b8aa844171 | ||
|
|
f9464c5cf9 | ||
|
|
9df75e1fa3 | ||
|
|
0218e2ebf7 | ||
|
|
a9dc6550d5 | ||
|
|
ffd6ec3c54 | ||
|
|
de3e0df96c | ||
|
|
e5dadf34d9 | ||
|
|
52145f2d73 | ||
|
|
90df3caf62 | ||
|
|
50db66a925 | ||
|
|
8587fa05bd | ||
|
|
8129dade3c | ||
|
|
3610fe7c33 | ||
|
|
90518e0ce5 | ||
|
|
9c060f06ba | ||
|
|
e848aa7813 | ||
|
|
feedc912e4 | ||
|
|
ab3f05cf62 | ||
|
|
35982e51bf | ||
|
|
94e650c518 | ||
|
|
d9edc18bf8 | ||
|
|
f4d01e0a05 | ||
|
|
648cfaba51 | ||
|
|
3a9de13f4e | ||
|
|
629a68937e | ||
|
|
34e80abdea | ||
|
|
1161b21166 | ||
|
|
bcdef81e30 | ||
|
|
acc0afbb7a | ||
|
|
7584044b3c | ||
|
|
02c14e981c | ||
|
|
37ee972f74 | ||
|
|
3809407b6a | ||
|
|
f9547c447a | ||
|
|
eb85d45137 | ||
|
|
9f0060f651 | ||
|
|
0e6dc3f7ea | ||
|
|
1b4944e1de | ||
|
|
83743e3613 | ||
|
|
87afcc3ef4 | ||
|
|
6ed3a4e1a6 | ||
|
|
8a56671d18 | ||
|
|
1d81db76a6 | ||
|
|
f50aecb84e | ||
|
|
a4258277e1 | ||
|
|
18eb3c7c38 | ||
|
|
a0e728b5c8 | ||
|
|
df0176cca4 | ||
|
|
b68b3c543b | ||
|
|
aea1a85bb4 | ||
|
|
98e874e750 | ||
|
|
eef016c27d | ||
|
|
19f89ecafd | ||
|
|
8817dee66c | ||
|
|
404e266222 | ||
|
|
9b898c65fa | ||
|
|
5c39cf4deb | ||
|
|
beff276a52 | ||
|
|
55cb82c6c8 | ||
|
|
88d1143827 | ||
|
|
d5162b1917 | ||
|
|
ec078543a1 | ||
|
|
9191074666 | ||
|
|
89824849d3 | ||
|
|
877083f091 | ||
|
|
6467fcd0f5 | ||
|
|
fd135f1a8b | ||
|
|
4e08ec2405 | ||
|
|
925c348565 | ||
|
|
25fd1aaf7e | ||
|
|
91e645b91b | ||
|
|
a1c2f07b6e | ||
|
|
7f7bec0668 | ||
|
|
cb34f7c6d1 | ||
|
|
7f47a61986 | ||
|
|
e8843c38f2 | ||
|
|
d66c00dd1d | ||
|
|
55ac8628c8 | ||
|
|
175f75b43f | ||
|
|
da3226745c | ||
|
|
b23e3ea13a | ||
|
|
02f0ee08fc | ||
|
|
4b0e79be50 | ||
|
|
8b729475e2 | ||
|
|
a1319b1786 | ||
|
|
278fa43303 | ||
|
|
d75f364b27 | ||
|
|
52d5021b76 | ||
|
|
7cfd3bd510 | ||
|
|
05ca131858 | ||
|
|
181ce8571d | ||
|
|
2ab0c6abce | ||
|
|
50caf29b4e | ||
|
|
067f7af142 | ||
|
|
d1449951bc | ||
|
|
a05af50b0f | ||
|
|
950aff269b | ||
|
|
e033db559f | ||
|
|
9a24a40fd2 | ||
|
|
df391e2144 | ||
|
|
9146b4d4b6 | ||
|
|
068d7e085b | ||
|
|
79510a8290 | ||
|
|
50240c93bd | ||
|
|
7ca0e5db60 | ||
|
|
c0e6765d46 | ||
|
|
7739b0e8ea | ||
|
|
ab23fc52db | ||
|
|
530d66b5c7 | ||
|
|
dad3e855b5 | ||
|
|
15cbd54d1c | ||
|
|
4cd719692e | ||
|
|
b940294557 | ||
|
|
840cdec36d | ||
|
|
73a5b70a02 | ||
|
|
f0cae49892 | ||
|
|
e07c7f0fa2 | ||
|
|
52ce97929c | ||
|
|
084eae6e0d | ||
|
|
f656b5c887 | ||
|
|
55c1d7c256 | ||
|
|
0f2b20fffc | ||
|
|
bb69727148 | ||
|
|
0b4f3aaf69 | ||
|
|
e5125515f0 | ||
|
|
033b2fd90d | ||
|
|
a0a00e38fd | ||
|
|
77cf70b625 | ||
|
|
8ab3d713bc | ||
|
|
c58d81cec5 |
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -12,9 +12,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
default:
|
||||
image: docker:stable
|
||||
image: docker
|
||||
services:
|
||||
- name: docker:stable-dind
|
||||
- name: docker:dind
|
||||
command: ["--experimental"]
|
||||
|
||||
variables:
|
||||
@@ -34,51 +34,91 @@ stages:
|
||||
- scan
|
||||
- release
|
||||
|
||||
.main-or-manual:
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
|
||||
- if: $CI_PIPELINE_SOURCE == "schedule"
|
||||
when: manual
|
||||
|
||||
# Define the distribution targets
|
||||
.dist-amazonlinux2:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: amazonlinux2
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-centos7:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: centos7
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-centos8:
|
||||
variables:
|
||||
DIST: centos8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-debian10:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: debian10
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-debian9:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: debian9
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-fedora35:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: fedora35
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-opensuse-leap15.1:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: opensuse-leap15.1
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-ubi8:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubi8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
PACKAGE_REPO_TYPE: rpm
|
||||
|
||||
.dist-ubuntu16.04:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubuntu16.04
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-ubuntu18.04:
|
||||
variables:
|
||||
DIST: ubuntu18.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-ubuntu20.04:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubuntu20.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
PACKAGE_REPO_TYPE: debian
|
||||
|
||||
.dist-packaging:
|
||||
variables:
|
||||
@@ -98,6 +138,8 @@ stages:
|
||||
ARCH: arm64
|
||||
|
||||
.arch-ppc64le:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
ARCH: ppc64le
|
||||
|
||||
@@ -138,7 +180,7 @@ test-packaging:
|
||||
# Download the regctl binary for use in the release steps
|
||||
.regctl-setup:
|
||||
before_script:
|
||||
- export REGCTL_VERSION=v0.3.10
|
||||
- export REGCTL_VERSION=v0.4.5
|
||||
- apk add --no-cache curl
|
||||
- mkdir -p bin
|
||||
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
|
||||
@@ -210,13 +252,6 @@ release:staging-centos7:
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
release:staging-centos8:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- image-centos8
|
||||
|
||||
release:staging-ubi8:
|
||||
extends:
|
||||
- .release:staging
|
||||
@@ -239,10 +274,7 @@ release:staging-ubuntu20.04:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- test-toolkit-ubuntu20.04
|
||||
- test-containerd-ubuntu20.04
|
||||
- test-crio-ubuntu20.04
|
||||
- test-docker-ubuntu20.04
|
||||
- image-ubuntu20.04
|
||||
|
||||
release:staging-packaging:
|
||||
extends:
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,6 +4,8 @@ dist
|
||||
/coverage.out*
|
||||
/test/output/
|
||||
/nvidia-container-runtime
|
||||
/nvidia-container-runtime-hook
|
||||
/nvidia-container-toolkit
|
||||
/nvidia-ctk
|
||||
/shared-*
|
||||
/release-*
|
||||
102
.gitlab-ci.yml
102
.gitlab-ci.yml
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -94,7 +94,7 @@ unit-tests:
|
||||
- .multi-arch-build
|
||||
- .package-artifacts
|
||||
stage: package-build
|
||||
timeout: 2h 30m
|
||||
timeout: 3h
|
||||
script:
|
||||
- ./scripts/build-packages.sh ${DIST}-${ARCH}
|
||||
|
||||
@@ -158,6 +158,18 @@ package-debian9-amd64:
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
package-fedora35-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-aarch64
|
||||
|
||||
package-fedora35-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-x86_64
|
||||
|
||||
package-opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
@@ -231,16 +243,6 @@ image-centos7:
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-x86_64
|
||||
- package-centos8-ppc64le
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-build
|
||||
@@ -260,7 +262,8 @@ image-ubuntu18.04:
|
||||
needs:
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
- job: package-ubuntu18.04-ppc64le
|
||||
optional: true
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
@@ -279,21 +282,36 @@ image-packaging:
|
||||
- .package-artifacts
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- package-amazonlinux2-aarch64
|
||||
- package-amazonlinux2-x86_64
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-ppc64le
|
||||
- package-centos8-x86_64
|
||||
- package-debian10-amd64
|
||||
- package-debian9-amd64
|
||||
- package-opensuse-leap15.1-x86_64
|
||||
- package-ubuntu16.04-amd64
|
||||
- package-ubuntu16.04-ppc64le
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
- job: package-centos8-aarch64
|
||||
- job: package-centos8-x86_64
|
||||
- job: package-ubuntu18.04-amd64
|
||||
- job: package-ubuntu18.04-arm64
|
||||
- job: package-amazonlinux2-aarch64
|
||||
optional: true
|
||||
- job: package-amazonlinux2-x86_64
|
||||
optional: true
|
||||
- job: package-centos7-ppc64le
|
||||
optional: true
|
||||
- job: package-centos7-x86_64
|
||||
optional: true
|
||||
- job: package-centos8-ppc64le
|
||||
optional: true
|
||||
- job: package-debian10-amd64
|
||||
optional: true
|
||||
- job: package-debian9-amd64
|
||||
optional: true
|
||||
- job: package-fedora35-aarch64
|
||||
optional: true
|
||||
- job: package-fedora35-x86_64
|
||||
optional: true
|
||||
- job: package-opensuse-leap15.1-x86_64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-amd64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-ppc64le
|
||||
optional: true
|
||||
- job: package-ubuntu18.04-ppc64le
|
||||
optional: true
|
||||
|
||||
# Define publish test helpers
|
||||
.test:toolkit:
|
||||
@@ -353,31 +371,3 @@ test-docker-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-toolkit-ubuntu20.04:
|
||||
extends:
|
||||
- .test:toolkit
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-containerd-ubuntu20.04:
|
||||
extends:
|
||||
- .test:containerd
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-crio-ubuntu20.04:
|
||||
extends:
|
||||
- .test:crio
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
test-docker-ubuntu20.04:
|
||||
extends:
|
||||
- .test:docker
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -5,6 +5,8 @@
|
||||
[submodule "third_party/nvidia-container-runtime"]
|
||||
path = third_party/nvidia-container-runtime
|
||||
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git
|
||||
branch = main
|
||||
[submodule "third_party/nvidia-docker"]
|
||||
path = third_party/nvidia-docker
|
||||
url = https://gitlab.com/nvidia/container-toolkit/nvidia-docker.git
|
||||
branch = main
|
||||
|
||||
209
.nvidia-ci.yml
209
.nvidia-ci.yml
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -35,6 +35,9 @@ variables:
|
||||
# Define the public staging registry
|
||||
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
|
||||
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
|
||||
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
|
||||
# TODO: We should set the kitmaker release folder here once we have the end-to-end workflow set up
|
||||
KITMAKER_RELEASE_FOLDER: "testing"
|
||||
|
||||
.image-pull:
|
||||
stage: image-build
|
||||
@@ -48,8 +51,9 @@ variables:
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PUSH_MULTIPLE_TAGS: "false"
|
||||
# We delay the job start to allow the public pipeline to generate the required images.
|
||||
when: delayed
|
||||
start_in: 30 minutes
|
||||
rules:
|
||||
- when: delayed
|
||||
start_in: 30 minutes
|
||||
timeout: 30 minutes
|
||||
retry:
|
||||
max: 2
|
||||
@@ -67,34 +71,132 @@ variables:
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-centos7
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-centos8
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubi8
|
||||
- .image-pull
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu18.04
|
||||
- .image-pull
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu20.04
|
||||
- .image-pull
|
||||
|
||||
# The DIST=packaging target creates an image containing all built packages
|
||||
image-packaging:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-packaging
|
||||
- .image-pull
|
||||
|
||||
# Define the package release targets
|
||||
release:packages:amazonlinux2-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-amazonlinux2
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:amazonlinux2-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-amazonlinux2
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:centos7-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos7
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:centos7-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos7
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:centos8-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:centos8-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:centos8-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-centos8
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:debian10-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-debian10
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:debian9-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:fedora35-aarch64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-fedora35
|
||||
- .arch-aarch64
|
||||
|
||||
release:packages:fedora35-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-fedora35
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-opensuse-leap15.1
|
||||
- .arch-x86_64
|
||||
|
||||
release:packages:ubuntu16.04-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu16.04
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:ubuntu16.04-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu16.04
|
||||
- .arch-ppc64le
|
||||
|
||||
release:packages:ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-amd64
|
||||
|
||||
release:packages:ubuntu18.04-arm64:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-arm64
|
||||
|
||||
release:packages:ubuntu18.04-ppc64le:
|
||||
extends:
|
||||
- .release:packages
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-ppc64le
|
||||
|
||||
# We skip the integration tests for the internal CI:
|
||||
.integration:
|
||||
@@ -112,9 +214,9 @@ image-packaging:
|
||||
variables:
|
||||
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
|
||||
IMAGE_ARCHIVE: "container-toolkit.tar"
|
||||
except:
|
||||
variables:
|
||||
- $SKIP_SCANS && $SKIP_SCANS == "yes"
|
||||
rules:
|
||||
- if: $SKIP_SCANS != "yes"
|
||||
- when: manual
|
||||
before_script:
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
# TODO: We should specify the architecture here and scan all architectures
|
||||
@@ -139,85 +241,59 @@ image-packaging:
|
||||
# Define the scan targets
|
||||
scan-centos7-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
scan-centos7-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-centos7
|
||||
- scan-centos7-amd64
|
||||
|
||||
scan-centos8-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos8
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-centos8
|
||||
|
||||
scan-centos8-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos8
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-centos8
|
||||
- scan-centos8-amd64
|
||||
|
||||
scan-ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu18.04
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
scan-ubuntu18.04-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu18.04
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
- scan-ubuntu18.04-amd64
|
||||
|
||||
scan-ubuntu20.04-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
scan-ubuntu20.04-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- scan-ubuntu20.04-amd64
|
||||
|
||||
scan-ubi8-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubi8
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubi8
|
||||
|
||||
scan-ubi8-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubi8
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubi8
|
||||
- scan-ubi8-amd64
|
||||
@@ -232,6 +308,27 @@ scan-ubi8-arm64:
|
||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||
|
||||
.release:packages:
|
||||
stage: release
|
||||
needs:
|
||||
- image-packaging
|
||||
variables:
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
PACKAGE_REGISTRY: "${CI_REGISTRY}"
|
||||
PACKAGE_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
||||
PACKAGE_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
|
||||
PACKAGE_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-${PACKAGE_REPO_TYPE}-local"
|
||||
KITMAKER_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${KITMAKER_RELEASE_FOLDER}"
|
||||
script:
|
||||
- !reference [.regctl-setup, before_script]
|
||||
- apk add --no-cache bash
|
||||
- regctl registry login "${PACKAGE_REGISTRY}" -u "${PACKAGE_REGISTRY_USER}" -p "${PACKAGE_REGISTRY_TOKEN}"
|
||||
- ./scripts/extract-packages.sh "${PACKAGE_IMAGE_NAME}:${PACKAGE_IMAGE_TAG}" "${DIST}-${ARCH}"
|
||||
# TODO: ./scripts/release-packages-artifactory.sh "${DIST}-${ARCH}" "${PACKAGE_ARTIFACTORY_REPO}"
|
||||
- ./scripts/release-kitmaker-artifactory.sh "${DIST}-${ARCH}" "${KITMAKER_ARTIFACTORY_REPO}"
|
||||
|
||||
release:staging-ubuntu18.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
@@ -239,36 +336,24 @@ release:staging-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
release:staging-ubuntu20.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
# Define the external release targets
|
||||
# Release to NGC
|
||||
release:ngc-centos7:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-centos7
|
||||
|
||||
release:ngc-centos8:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-centos8
|
||||
|
||||
release:ngc-ubuntu18.04:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu18.04
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubuntu20.04:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu20.04
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubi8:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubi8
|
||||
- .release:ngc
|
||||
|
||||
213
CHANGELOG.md
Normal file
213
CHANGELOG.md
Normal file
@@ -0,0 +1,213 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.12.0-rc.3
|
||||
|
||||
## v1.12.0-rc.2
|
||||
|
||||
* Inject Direct Rendering Manager (DRM) devices into a container using the NVIDIA Container Runtime
|
||||
* Improve logging of errors from the NVIDIA Container Runtime
|
||||
* Improve CDI specification generation to support rootless podman
|
||||
* Use `nvidia-ctk cdi generate` to generate CDI specifications instead of `nvidia-ctk info generate-cdi`
|
||||
* [libnvidia-container] Skip creation of existing files when these are already mounted
|
||||
|
||||
## v1.12.0-rc.1
|
||||
|
||||
* Add support for multiple Docker Swarm resources
|
||||
* Improve injection of Vulkan configurations and libraries
|
||||
* Add `nvidia-ctk info generate-cdi` command to generated CDI specification for available devices
|
||||
* [libnvidia-container] Include NVVM compiler library in compute libs
|
||||
|
||||
## v1.11.0
|
||||
|
||||
* Promote v1.11.0-rc.3 to v1.11.0
|
||||
|
||||
## v1.11.0-rc.3
|
||||
|
||||
* Build fedora35 packages
|
||||
* Introduce an `nvidia-container-toolkit-base` package for better dependency management
|
||||
* Fix removal of `nvidia-container-runtime-hook` on RPM-based systems
|
||||
* Inject platform files into container on Tegra-based systems
|
||||
* [toolkit container] Update CUDA base images to 11.7.1
|
||||
* [libnvidia-container] Preload libgcc_s.so.1 on arm64 systems
|
||||
|
||||
## v1.11.0-rc.2
|
||||
|
||||
* Allow `accept-nvidia-visible-devices-*` config options to be set by toolkit container
|
||||
* [libnvidia-container] Fix bug where LDCache was not updated when the `--no-pivot-root` option was specified
|
||||
|
||||
## v1.11.0-rc.1
|
||||
|
||||
* Add discovery of GPUDirect Storage (`nvidia-fs*`) devices if the `NVIDIA_GDS` environment variable of the container is set to `enabled`
|
||||
* Add discovery of MOFED Infiniband devices if the `NVIDIA_MOFED` environment variable of the container is set to `enabled`
|
||||
* Fix bug in CSV mode where libraries listed as `sym` entries in mount specification are not added to the LDCache.
|
||||
* Rename `nvidia-container-toolkit` executable to `nvidia-container-runtime-hook` and create `nvidia-container-toolkit` as a symlink to `nvidia-container-runtime-hook` instead.
|
||||
* Add `nvidia-ctk runtime configure` command to configure the Docker config file (e.g. `/etc/docker/daemon.json`) for use with the NVIDIA Container Runtime.
|
||||
|
||||
## v1.10.0
|
||||
|
||||
* Promote v1.10.0-rc.3 to v1.10.0
|
||||
|
||||
## v1.10.0-rc.3
|
||||
|
||||
* Use default config instead of raising an error if config file cannot be found
|
||||
* Ignore NVIDIA_REQUIRE_JETPACK* environment variables for requirement checks
|
||||
* Fix bug in detection of Tegra systems where `/sys/devices/soc0/family` is ignored
|
||||
* Fix bug where links to devices were detected as devices
|
||||
* [libnvida-container] Fix bug introduced when adding libcudadebugger.so to list of libraries
|
||||
|
||||
## v1.10.0-rc.2
|
||||
|
||||
* Add support for NVIDIA_REQUIRE_* checks for cuda version and arch to csv mode
|
||||
* Switch to debug logging to reduce log verbosity
|
||||
* Support logging to logs requested in command line
|
||||
* Fix bug when launching containers with relative root path (e.g. using containerd)
|
||||
* Allow low-level runtime path to be set explicitly as nvidia-container-runtime.runtimes option
|
||||
* Fix failure to locate low-level runtime if PATH envvar is unset
|
||||
* Replace experimental option for NVIDIA Container Runtime with nvidia-container-runtime.mode = csv option
|
||||
* Use csv as default mode on Tegra systems without NVML
|
||||
* Add --version flag to all CLIs
|
||||
* [libnvidia-container] Bump libtirpc to 1.3.2
|
||||
* [libnvidia-container] Fix bug when running host ldconfig using glibc compiled with a non-standard prefix
|
||||
* [libnvidia-container] Add libcudadebugger.so to list of compute libraries
|
||||
|
||||
## v1.10.0-rc.1
|
||||
|
||||
* Include nvidia-ctk CLI in installed binaries
|
||||
* Add experimental option to NVIDIA Container Runtime
|
||||
|
||||
## v1.9.0
|
||||
|
||||
* [libnvidia-container] Add additional check for Tegra in /sys/.../family file in CLI
|
||||
* [libnvidia-container] Update jetpack-specific CLI option to only load Base CSV files by default
|
||||
* [libnvidia-container] Fix bug (from 1.8.0) when mounting GSP firmware into containers without /lib to /usr/lib symlinks
|
||||
* [libnvidia-container] Update nvml.h to CUDA 11.6.1 nvML_DEV 11.6.55
|
||||
* [libnvidia-container] Update switch statement to include new brands from latest nvml.h
|
||||
* [libnvidia-container] Process all --require flags on Jetson platforms
|
||||
* [libnvidia-container] Fix long-standing issue with running ldconfig on Debian systems
|
||||
|
||||
## v1.8.1
|
||||
|
||||
* [libnvidia-container] Fix bug in determining cgroup root when running in nested containers
|
||||
* [libnvidia-container] Fix permission issue when determining cgroup version
|
||||
|
||||
## v1.8.0
|
||||
|
||||
* Promote 1.8.0-rc.2-1 to 1.8.0
|
||||
|
||||
## v1.8.0-rc.2
|
||||
|
||||
* Remove support for building amazonlinux1 packages
|
||||
|
||||
## v1.8.0-rc.1
|
||||
|
||||
* [libnvidia-container] Add support for cgroupv2
|
||||
* Release toolkit-container images from nvidia-container-toolkit repository
|
||||
|
||||
## v1.7.0
|
||||
|
||||
* Promote 1.7.0-rc.1-1 to 1.7.0
|
||||
* Bump Golang version to 1.16.4
|
||||
|
||||
## v1.7.0-rc.1
|
||||
|
||||
* Specify containerd runtime type as string in config tools to remove dependency on containerd package
|
||||
* Add supported-driver-capabilities config option to allow for a subset of all driver capabilities to be specified
|
||||
|
||||
## v1.6.0
|
||||
|
||||
* Promote 1.6.0-rc.3-1 to 1.6.0
|
||||
* Fix unnecessary logging to stderr instead of configured nvidia-container-runtime log file
|
||||
|
||||
## v1.6.0-rc.3
|
||||
|
||||
* Add supported-driver-capabilities config option to the nvidia-container-toolkit
|
||||
* Move OCI and command line checks for runtime to internal oci package
|
||||
|
||||
## v1.6.0-rc.2
|
||||
|
||||
* Use relative path to OCI specification file (config.json) if bundle path is not specified as an argument to the nvidia-container-runtime
|
||||
|
||||
## v1.6.0-rc.1
|
||||
|
||||
* Add AARCH64 package for Amazon Linux 2
|
||||
* Include nvidia-container-runtime into nvidia-container-toolkit package
|
||||
|
||||
## v1.5.1
|
||||
|
||||
* Fix bug where Docker Swarm device selection is ignored if NVIDIA_VISIBLE_DEVICES is also set
|
||||
* Improve unit testing by using require package and adding coverage reports
|
||||
* Remove unneeded go dependencies by running go mod tidy
|
||||
* Move contents of pkg directory to cmd for CLI tools
|
||||
* Ensure make binary target explicitly sets GOOS
|
||||
|
||||
## v1.5.0
|
||||
|
||||
* Add dependence on libnvidia-container-tools >= 1.4.0
|
||||
* Add golang check targets to Makefile
|
||||
* Add Jenkinsfile definition for build targets
|
||||
* Move docker.mk to docker folder
|
||||
|
||||
## v1.4.2
|
||||
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.3
|
||||
|
||||
## v1.4.1
|
||||
|
||||
* Ignore NVIDIA_VISIBLE_DEVICES for containers with insufficent privileges
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.2
|
||||
|
||||
## v1.4.0
|
||||
|
||||
* Add 'compute' capability to list of defaults
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.1
|
||||
|
||||
## v1.3.0
|
||||
|
||||
* Promote 1.3.0-rc.2-1 to 1.3.0
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.0
|
||||
|
||||
## v1.3.0-rc.2
|
||||
|
||||
* 2c180947 Add more tests for new semantics with device list from volume mounts
|
||||
* 7c003857 Refactor accepting device lists from volume mounts as a boolean
|
||||
|
||||
## v1.3.0-rc.1
|
||||
|
||||
* b50d86c1 Update build system to accept a TAG variable for things like rc.x
|
||||
* fe65573b Add common CI tests for things like golint, gofmt, unit tests, etc.
|
||||
* da6fbb34 Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
|
||||
* a7fb3330 Flip build-all targets to run automatically on merge requests
|
||||
* 8b248b66 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
|
||||
* da36874e Add new config options to pull device list from mounted files instead of ENVVAR
|
||||
|
||||
## v1.2.1
|
||||
|
||||
* 4e6e0ed4 Add 'ngx' to list of*all* driver capabilities
|
||||
* 2f4af743 List config.toml as a config file in the RPM SPEC
|
||||
|
||||
## v1.2.0
|
||||
|
||||
* 8e0aab46 Fix repo listed in changelog for debian distributions
|
||||
* 320bb6e4 Update dependence on libnvidia-container to 1.2.0
|
||||
* 6cfc8097 Update package license to match source license
|
||||
* e7dc3cbb Fix debian copyright file
|
||||
* d3aee3e0 Add the 'ngx' driver capability
|
||||
|
||||
## v1.1.2
|
||||
|
||||
* c32237f3 Add support for parsing Linux Capabilities for older OCI specs
|
||||
|
||||
## v1.1.1
|
||||
|
||||
* d202aded Update dependence to libnvidia-container 1.1.1
|
||||
|
||||
## v1.1.0
|
||||
|
||||
* 4e4de762 Update build system to support multi-arch builds
|
||||
* fcc1d116 Add support for MIG (Multi-Instance GPUs)
|
||||
* d4ff0416 Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*
|
||||
* 60f165ad Add no-pivot option to toolkit
|
||||
|
||||
## v1.0.5
|
||||
|
||||
* Initial release. Replaces older package nvidia-container-runtime-hook. (Closes: #XXXXXX)
|
||||
8
Makefile
8
Makefile
@@ -39,7 +39,7 @@ CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
|
||||
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
|
||||
|
||||
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate $(CHECK_TARGETS)
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
|
||||
|
||||
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
|
||||
|
||||
@@ -51,6 +51,7 @@ CLI_VERSION = $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
else
|
||||
CLI_VERSION = $(VERSION)
|
||||
endif
|
||||
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
|
||||
|
||||
GOOS ?= linux
|
||||
|
||||
@@ -60,7 +61,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
|
||||
endif
|
||||
cmds: $(CMD_TARGETS)
|
||||
$(CMD_TARGETS): cmd-%:
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w -X github.com/NVIDIA/nvidia-container-toolkit/internal/info.gitCommit=$(GIT_COMMIT) -X github.com/NVIDIA/nvidia-container-toolkit/internal/info.version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
|
||||
build:
|
||||
GOOS=$(GOOS) go build ./...
|
||||
@@ -102,6 +103,9 @@ misspell:
|
||||
vet:
|
||||
go vet $(MODULE)/...
|
||||
|
||||
licenses:
|
||||
go-licenses csv $(MODULE)/...
|
||||
|
||||
COVERAGE_FILE := coverage.out
|
||||
test: build cmds
|
||||
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
|
||||
|
||||
@@ -67,9 +67,9 @@ ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
|
||||
yum localinstall -y \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-1.*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*-${PACKAGE_VERSION}*.rpm
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
@@ -85,7 +85,7 @@ LABEL release="N/A"
|
||||
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
||||
LABEL description="See summary"
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
# Install / upgrade packages here that are required to resolve CVEs
|
||||
ARG CVE_UPDATES
|
||||
|
||||
@@ -25,5 +25,7 @@ ARG ARTIFACTS_ROOT
|
||||
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
|
||||
WORKDIR /artifacts/packages
|
||||
# Create a manifest.txt file with the absolute paths of all deb and rpm packages in the container
|
||||
RUN find /artifacts/packages -iname '*.deb' -o -iname '*.rpm' > /artifacts/manifest.txt
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
@@ -75,9 +75,9 @@ RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
|
||||
fi
|
||||
|
||||
RUN dpkg -i \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit_${PACKAGE_VERSION}*.deb
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*_${PACKAGE_VERSION}*.deb
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
@@ -93,7 +93,7 @@ LABEL release="N/A"
|
||||
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
||||
LABEL description="See summary"
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
# Install / upgrade packages here that are required to resolve CVEs
|
||||
ARG CVE_UPDATES
|
||||
|
||||
@@ -43,8 +43,8 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
|
||||
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
|
||||
|
||||
##### Public rules #####
|
||||
DEFAULT_PUSH_TARGET := ubuntu18.04
|
||||
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
|
||||
DEFAULT_PUSH_TARGET := ubuntu20.04
|
||||
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7
|
||||
|
||||
META_TARGETS := packaging
|
||||
|
||||
@@ -110,10 +110,10 @@ build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
build-ubi8: PACKAGE_DIST = centos8
|
||||
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-centos%: BASE_DIST = $(*)
|
||||
build-centos%: DOCKERFILE_SUFFIX := centos
|
||||
build-centos%: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos%: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
build-centos7: BASE_DIST = $(*)
|
||||
build-centos7: DOCKERFILE_SUFFIX := centos
|
||||
build-centos7: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos7: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-packaging: BASE_DIST := ubuntu20.04
|
||||
build-packaging: DOCKERFILE_SUFFIX := packaging
|
||||
|
||||
@@ -30,5 +30,8 @@ push-short:
|
||||
# We only have x86_64 packages for centos7
|
||||
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate amd64 image for ubuntu18.04
|
||||
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate a single image for packaging targets
|
||||
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
@@ -13,8 +13,6 @@ import (
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
var envSwarmGPU *string
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
@@ -165,43 +163,31 @@ func isPrivileged(s *Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
||||
// Build a list of envvars to consider.
|
||||
envVars := []string{envNVVisibleDevices}
|
||||
if envSwarmGPU != nil {
|
||||
// The Swarm envvar has higher precedence.
|
||||
envVars = append([]string{*envSwarmGPU}, envVars...)
|
||||
}
|
||||
|
||||
// Grab a reference to devices from the first envvar
|
||||
// in the list that actually exists in the environment.
|
||||
var devices *string
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := env[envVar]; ok {
|
||||
devices = &devs
|
||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||
// if specified.
|
||||
var hasSwarmEnvvar bool
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if _, exists := image[envvar]; exists {
|
||||
hasSwarmEnvvar = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if devices == nil && legacyImage {
|
||||
all := "all"
|
||||
return &all
|
||||
var devices []string
|
||||
if hasSwarmEnvvar {
|
||||
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||
} else {
|
||||
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Environment variable set to "none": reset to "".
|
||||
if *devices == "none" {
|
||||
empty := ""
|
||||
return &empty
|
||||
}
|
||||
devicesString := strings.Join(devices, ",")
|
||||
|
||||
// Any other value.
|
||||
return devices
|
||||
return &devicesString
|
||||
}
|
||||
|
||||
func getDevicesFromMounts(mounts []Mount) *string {
|
||||
@@ -241,7 +227,7 @@ func getDevicesFromMounts(mounts []Mount) *string {
|
||||
return &ret
|
||||
}
|
||||
|
||||
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
|
||||
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := getDevicesFromMounts(mounts)
|
||||
@@ -251,7 +237,7 @@ func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, p
|
||||
}
|
||||
|
||||
// Fallback to reading from the environment variable if privileges are correct
|
||||
devices := getDevicesFromEnvvar(env, legacyImage)
|
||||
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
||||
if devices == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -307,7 +293,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
|
||||
legacyImage := image.IsLegacy()
|
||||
|
||||
var devices string
|
||||
if d := getDevices(hookConfig, image, mounts, privileged, legacyImage); d != nil {
|
||||
if d := getDevices(hookConfig, image, mounts, privileged); d != nil {
|
||||
devices = *d
|
||||
} else {
|
||||
// 'nil' devices means this is not a GPU container.
|
||||
@@ -369,7 +355,6 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
}
|
||||
|
||||
privileged := isPrivileged(s)
|
||||
envSwarmGPU = hook.SwarmResource
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
@@ -1,9 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@@ -68,7 +70,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description: "Legacy image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "",
|
||||
envNVVisibleDevices: "void",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
@@ -225,7 +227,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description: "Modern image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "",
|
||||
envNVVisibleDevices: "void",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
@@ -448,6 +450,44 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, swarmResource overrides device selection",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
s := "DOCKER_SWARM_RESOURCE"
|
||||
return &s
|
||||
}(),
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "GPU1,GPU2",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, comma separated swarmResource is split and overrides device selection",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
s := "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE"
|
||||
return &s
|
||||
}(),
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "GPU1,GPU2",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
@@ -671,7 +711,7 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
||||
hookConfig := getDefaultHookConfig()
|
||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false)
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
@@ -688,13 +728,13 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||
gpuID := "GPU-12345"
|
||||
anotherGPUID := "GPU-67890"
|
||||
thirdGPUID := "MIG-12345"
|
||||
|
||||
var tests = []struct {
|
||||
description string
|
||||
envSwarmGPU *string
|
||||
env map[string]string
|
||||
legacyImage bool
|
||||
expectedDevices *string
|
||||
description string
|
||||
swarmResourceEnvvars []string
|
||||
env map[string]string
|
||||
expectedDevices *string
|
||||
}{
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
@@ -729,13 +769,15 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
legacyImage: true,
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
expectedDevices: &all,
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||
@@ -781,86 +823,116 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &all,
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||
// enabled
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
},
|
||||
{
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "none",
|
||||
},
|
||||
expectedDevices: &empty,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
},
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: func() *string {
|
||||
result := fmt.Sprintf("%s,%s", thirdGPUID, anotherGPUID)
|
||||
return &result
|
||||
}(),
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
envSwarmGPU = tc.envSwarmGPU
|
||||
devices := getDevicesFromEnvvar(tc.env, tc.legacyImage)
|
||||
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
|
||||
if tc.expectedDevices == nil {
|
||||
require.Nil(t, devices, "%d: %v", i, tc)
|
||||
return
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
@@ -34,7 +35,7 @@ type CLIConfig struct {
|
||||
Ldconfig *string `toml:"ldconfig"`
|
||||
}
|
||||
|
||||
// HookConfig : options for the nvidia-container-toolkit.
|
||||
// HookConfig : options for the nvidia-container-runtime-hook.
|
||||
type HookConfig struct {
|
||||
DisableRequire bool `toml:"disable-require"`
|
||||
SwarmResource *string `toml:"swarm-resource"`
|
||||
@@ -116,3 +117,22 @@ func (c HookConfig) getConfigOption(fieldName string) string {
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||
func (c *HookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c.SwarmResource == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
candidates := strings.Split(*c.SwarmResource, ",")
|
||||
|
||||
var envvars []string
|
||||
for _, c := range candidates {
|
||||
trimmed := strings.TrimSpace(c)
|
||||
if len(trimmed) > 0 {
|
||||
envvars = append(envvars, trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
return envvars
|
||||
}
|
||||
@@ -103,3 +103,59 @@ func TestGetHookConfig(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetSwarmResourceEnvvars(t *testing.T) {
|
||||
testCases := []struct {
|
||||
value string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
value: "nil",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: "",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: " ",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: "single",
|
||||
expected: []string{"single"},
|
||||
},
|
||||
{
|
||||
value: "single ",
|
||||
expected: []string{"single"},
|
||||
},
|
||||
{
|
||||
value: "one,two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
{
|
||||
value: "one ,two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
{
|
||||
value: "one, two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
c := &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
if tc.value == "nil" {
|
||||
return nil
|
||||
}
|
||||
return &tc.value
|
||||
}(),
|
||||
}
|
||||
|
||||
envvars := c.getSwarmResourceEnvvars()
|
||||
require.EqualValues(t, tc.expected, envvars)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -75,7 +75,7 @@ func doPrestart() {
|
||||
cli := hook.NvidiaContainerCLI
|
||||
|
||||
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.")
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
|
||||
}
|
||||
|
||||
container := getContainerConfig(hook)
|
||||
@@ -2,24 +2,86 @@
|
||||
|
||||
The NVIDIA Container Runtime is a shim for OCI-compliant low-level runtimes such as [runc](https://github.com/opencontainers/runc). When a `create` command is detected, the incoming [OCI runtime specification](https://github.com/opencontainers/runtime-spec) is modified in place and the command is forwarded to the low-level runtime.
|
||||
|
||||
## Standard Mode
|
||||
## Configuration
|
||||
|
||||
In the standard mode configuration, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from project [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
|
||||
The NVIDIA Container Runtime uses file-based configuration, with the config stored in `/etc/nvidia-container-runtime/config.toml`. The `/etc` path can be overridden using the `XDG_CONFIG_HOME` environment variable with the `${XDG_CONFIG_HOME}/nvidia-container-runtime/config.toml` file used instead if this environment variable is set.
|
||||
|
||||
## Experimental Mode
|
||||
This config file may contain options for other components of the NVIDIA container stack and for the NVIDIA Container Runtime, the relevant config section is `nvidia-container-runtime`
|
||||
|
||||
The NVIDIA Container Runtime can be configured in an experimental mode by setting the following options in the runtime's `config.toml` file:
|
||||
### Logging
|
||||
|
||||
The `log-level` config option (default: `"info"`) specifies the log level to use and the `debug` option, if set, specifies a log file to which logs for the NVIDIA Container Runtime must be written.
|
||||
|
||||
In addition to this, the NVIDIA Container Runtime considers the value of `--log` and `--log-format` flags that may be passed to it by a container runtime such as docker or containerd. If the `--debug` flag is present the log-level specified in the config file is overridden as `"debug"`.
|
||||
|
||||
### Low-level Runtime Path
|
||||
|
||||
The `runtimes` config option allows for the low-level runtime to be specified. The first entry in this list that is an existing executable file is used as the low-level runtime. If the entry is not a path, the `PATH` is searched for a matching executable. If the entry is a path this is checked instead.
|
||||
|
||||
The default value for this setting is:
|
||||
```toml
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
```
|
||||
|
||||
and if, for example, `crun` is to be used instead this can be changed to:
|
||||
```toml
|
||||
runtimes = [
|
||||
"crun",
|
||||
]
|
||||
```
|
||||
|
||||
### Runtime Mode
|
||||
|
||||
The `mode` config option (default `"auto"`) controls the high-level behaviour of the runtime.
|
||||
|
||||
#### Auto Mode
|
||||
|
||||
When `mode` is set to `"auto"`, the runtime employs heuristics to determine which mode to use based on, for example, the platform where the runtime is being run.
|
||||
|
||||
#### Legacy Mode
|
||||
|
||||
When `mode` is set to `"legacy"`, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from the [libnvidia-container](https://github.com/NVIDIA/libnvidia-container) project.
|
||||
|
||||
#### CSV Mode
|
||||
|
||||
When `mode` is set to `"csv"`, CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` define the devices and mounts that are to be injected into a container when it is created. The search path for the files can be overridden by modifying the `nvidia-container-runtime.modes.csv.mount-spec-path` in the config as below:
|
||||
|
||||
```toml
|
||||
[nvidia-container-runtime]
|
||||
experimental = true
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
```
|
||||
|
||||
When this setting is enabled, the modifications made to the OCI specification are controlled by the `nvidia-container-runtime.discover-mode` option, with the following mode supported:
|
||||
|
||||
* `"legacy"`: This mode mirrors the behaviour of the standard mode, inserting the NVIDIA Container Runtime Hook as a `prestart` hook into the container's OCI specification.
|
||||
* `"csv"`: This mode uses CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` to define the devices and mounts that are to be injected into a container when it is created.
|
||||
This mode is primarily targeted at Tegra-based systems without NVML available.
|
||||
|
||||
### Notes on using the docker CLI
|
||||
|
||||
The `docker` CLI supports the `--gpus` flag to select GPUs for inclusion in a container. Since specifying this flag inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification. When experimental mode is activated, the NVIDIA Container Runtime detects the presence of the hook and raises an error. This requirement will be relaxed in the near future.
|
||||
Note that only the `"legacy"` NVIDIA Container Runtime mode is directly compatible with the `--gpus` flag implemented by the `docker` CLI (assuming the NVIDIA Container Runtime is not used). The reason for this is that `docker` inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification.
|
||||
|
||||
|
||||
If a different mode is explicitly set or detected, the NVIDIA Container Runtime Hook will raise the following error when `--gpus` is set:
|
||||
```
|
||||
$ docker run --rm --gpus all ubuntu:18.04
|
||||
docker: Error response from daemon: failed to create shim: OCI runtime create failed: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: Running hook #0:: error running hook: exit status 1, stdout: , stderr: Auto-detected mode as 'csv'
|
||||
invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.: unknown.
|
||||
```
|
||||
Here NVIDIA Container Runtime must be used explicitly. The recommended way to do this is to specify the `--runtime=nvidia` command line argument as part of the `docker run` commmand as follows:
|
||||
```
|
||||
$ docker run --rm --gpus all --runtime=nvidia ubuntu:18.04
|
||||
```
|
||||
|
||||
Alternatively the NVIDIA Container Runtime can be set as the default runtime for docker. This can be done by modifying the `/etc/docker/daemon.json` file as follows:
|
||||
```json
|
||||
{
|
||||
"default-runtime": "nvidia",
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"path": "nvidia-container-runtime",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -48,7 +48,12 @@ func run(argv []string) (rerr error) {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to set up logger: %v", err)
|
||||
}
|
||||
defer logger.Reset()
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
logger.Errorf("%v", rerr)
|
||||
}
|
||||
logger.Reset()
|
||||
}()
|
||||
|
||||
logger.Debugf("Command line arguments: %v", argv)
|
||||
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
@@ -19,9 +19,9 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -62,11 +62,49 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
|
||||
|
||||
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
|
||||
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
modeModifier, err := newModeModifier(logger, cfg, ociSpec, argv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gdsModifier, err := modifier.NewGDSModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mofedModifier, err := modifier.NewMOFEDModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tegraModifier, err := modifier.NewTegraPlatformFiles(logger)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
modifiers := modifier.Merge(
|
||||
modeModifier,
|
||||
graphicsModifier,
|
||||
gdsModifier,
|
||||
mofedModifier,
|
||||
tegraModifier,
|
||||
)
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
|
||||
case "legacy":
|
||||
return modifier.NewStableRuntimeModifier(logger), nil
|
||||
case "csv":
|
||||
return modifier.NewCSVModifier(logger, cfg, ociSpec)
|
||||
case "cdi":
|
||||
return modifier.NewCDIModifier(logger, cfg, ociSpec)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
||||
|
||||
@@ -1,3 +1,48 @@
|
||||
# NVIDIA Container Toolkit CLI
|
||||
|
||||
The NVIDIA Container Toolkit CLI `nvidia-ctk` provides a number of utilities that are useful for working with the NVIDIA Container Toolkit.
|
||||
|
||||
## Functionality
|
||||
|
||||
### Configure runtimes
|
||||
|
||||
The `runtime` command of the `nvidia-ctk` CLI provides a set of utilities to related to the configuration
|
||||
and management of supported container engines.
|
||||
|
||||
For example, running the following command:
|
||||
```bash
|
||||
nvidia-ctk runtime configure --set-as-default
|
||||
```
|
||||
will ensure that the NVIDIA Container Runtime is added as the default runtime to the default container
|
||||
engine.
|
||||
|
||||
### Generate CDI specifications
|
||||
|
||||
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
|
||||
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
|
||||
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
|
||||
available NVIDIA GPUs in a system.
|
||||
|
||||
In order to generate the CDI specification for the available devices, run the following command:\
|
||||
```bash
|
||||
nvidia-ctk cdi generate
|
||||
```
|
||||
|
||||
The default is to print the specification to STDOUT and a filename can be specified using the `--output` flag.
|
||||
|
||||
The specification will contain a device entries as follows (where applicable):
|
||||
* An `nvidia.com/gpu=gpu{INDEX}` device for each non-MIG-enabled full GPU in the system
|
||||
* An `nvidia.com/gpu=mig{GPU_INDEX}:{MIG_INDEX}` device for each MIG-device in the system
|
||||
* A special device called `nvidia.com/gpu=all` which represents all available devices.
|
||||
|
||||
For example, to generate the CDI specification in the default location where CDI-enabled tools such as `podman`, `containerd`, `cri-o`, or the NVIDIA Container Runtime can be configured to load it, the following command can be run:
|
||||
|
||||
```bash
|
||||
sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||
```
|
||||
(Note that `sudo` is used to ensure the correct permissions to write to the `/etc/cdi` folder)
|
||||
|
||||
With the specification generated, a GPU can be requested by specifying the fully-qualified CDI device name. With `podman` as an exmaple:
|
||||
```bash
|
||||
podman run --rm -ti --device=nvidia.com/gpu=gpu0 ubuntu nvidia-smi -L
|
||||
```
|
||||
|
||||
50
cmd/nvidia-ctk/cdi/cdi.go
Normal file
50
cmd/nvidia-ctk/cdi/cdi.go
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cdi
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an info command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "cdi",
|
||||
Usage: "Provide tools for interacting with Container Device Interface specifications",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{
|
||||
generate.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
}
|
||||
60
cmd/nvidia-ctk/cdi/generate/common.go
Normal file
60
cmd/nvidia-ctk/cdi/generate/common.go
Normal file
@@ -0,0 +1,60 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// NewCommonDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
|
||||
// This includes driver libraries and meta devices, for example.
|
||||
func NewCommonDiscoverer(logger *logrus.Logger, root string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
metaDevices := discover.NewDeviceDiscoverer(
|
||||
logger,
|
||||
lookup.NewCharDeviceLocator(logger, root),
|
||||
root,
|
||||
[]string{
|
||||
"/dev/nvidia-modeset",
|
||||
"/dev/nvidia-uvm-tools",
|
||||
"/dev/nvidia-uvm",
|
||||
"/dev/nvidiactl",
|
||||
},
|
||||
)
|
||||
|
||||
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing discoverer for graphics mounts: %v", err)
|
||||
}
|
||||
|
||||
driverFiles, err := NewDriverDiscoverer(logger, root, nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
|
||||
}
|
||||
|
||||
d := discover.Merge(
|
||||
metaDevices,
|
||||
graphicsMounts,
|
||||
driverFiles,
|
||||
)
|
||||
|
||||
return d, nil
|
||||
}
|
||||
106
cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go
Normal file
106
cmd/nvidia-ctk/cdi/generate/device-folder-permissions.go
Normal file
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type deviceFolderPermissions struct {
|
||||
logger *logrus.Logger
|
||||
root string
|
||||
folders []string
|
||||
}
|
||||
|
||||
var _ discover.Discover = (*deviceFolderPermissions)(nil)
|
||||
|
||||
// NewDeviceFolderPermissionHookDiscoverer creates a discoverer that can be used to update the permissions for the parent folders of nested device nodes from the specified set of device specs.
|
||||
// This works around an issue with rootless podman when using crun as a low-level runtime.
|
||||
// See https://github.com/containers/crun/issues/1047
|
||||
// The nested devices that are applicable to the NVIDIA GPU devices are:
|
||||
// - DRM devices at /dev/dri/*
|
||||
// - NVIDIA Caps devices at /dev/nvidia-caps/*
|
||||
func NewDeviceFolderPermissionHookDiscoverer(logger *logrus.Logger, root string, deviceSpecs []specs.Device) (discover.Discover, error) {
|
||||
var folders []string
|
||||
seen := make(map[string]bool)
|
||||
for _, device := range deviceSpecs {
|
||||
for _, dn := range device.ContainerEdits.DeviceNodes {
|
||||
df := filepath.Dir(dn.Path)
|
||||
if seen[df] {
|
||||
continue
|
||||
}
|
||||
// We only consider the special case paths
|
||||
if df != "/dev/dri" && df != "/dev/nvidia-caps" {
|
||||
continue
|
||||
}
|
||||
folders = append(folders, df)
|
||||
seen[df] = true
|
||||
}
|
||||
if len(folders) == 2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(folders) == 0 {
|
||||
return discover.None{}, nil
|
||||
}
|
||||
|
||||
d := &deviceFolderPermissions{
|
||||
logger: logger,
|
||||
root: root,
|
||||
folders: folders,
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Devices are empty for this discoverer
|
||||
func (d *deviceFolderPermissions) Devices() ([]discover.Device, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Hooks returns a set of hooks that sets the file mode to 755 of parent folders for nested device nodes.
|
||||
func (d *deviceFolderPermissions) Hooks() ([]discover.Hook, error) {
|
||||
if len(d.folders) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
args := []string{"--mode", "755"}
|
||||
for _, folder := range d.folders {
|
||||
args = append(args, "--path", folder)
|
||||
}
|
||||
hook := discover.CreateNvidiaCTKHook(
|
||||
d.logger,
|
||||
lookup.NewExecutableLocator(d.logger, d.root),
|
||||
nvidiaCTKExecutable,
|
||||
nvidiaCTKDefaultFilePath,
|
||||
"chmod",
|
||||
args...,
|
||||
)
|
||||
|
||||
return []discover.Hook{hook}, nil
|
||||
}
|
||||
|
||||
// Mounts are empty for this discoverer
|
||||
func (d *deviceFolderPermissions) Mounts() ([]discover.Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
57
cmd/nvidia-ctk/cdi/generate/device.go
Normal file
57
cmd/nvidia-ctk/cdi/generate/device.go
Normal file
@@ -0,0 +1,57 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// deviceDiscoverer defines a discoverer for device nodes
|
||||
type deviceDiscoverer struct {
|
||||
logger *logrus.Logger
|
||||
root string
|
||||
deviceNodePaths []string
|
||||
}
|
||||
|
||||
var _ discover.Discover = (*deviceDiscoverer)(nil)
|
||||
|
||||
// Devices returns the device nodes for the full GPU.
|
||||
func (d *deviceDiscoverer) Devices() ([]discover.Device, error) {
|
||||
var deviceNodes []discover.Device
|
||||
for _, dn := range d.deviceNodePaths {
|
||||
deviceNode := discover.Device{
|
||||
HostPath: filepath.Join(d.root, dn),
|
||||
Path: dn,
|
||||
}
|
||||
deviceNodes = append(deviceNodes, deviceNode)
|
||||
}
|
||||
|
||||
return deviceNodes, nil
|
||||
}
|
||||
|
||||
// Hooks returns no hooks for a device discoverer
|
||||
func (d *deviceDiscoverer) Hooks() ([]discover.Hook, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Mounts returns no mounts for a device discoverer
|
||||
func (d *deviceDiscoverer) Mounts() ([]discover.Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
171
cmd/nvidia-ctk/cdi/generate/driver.go
Normal file
171
cmd/nvidia-ctk/cdi/generate/driver.go
Normal file
@@ -0,0 +1,171 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
type driverLibraries struct {
|
||||
logger *logrus.Logger
|
||||
root string
|
||||
libraries []string
|
||||
}
|
||||
|
||||
var _ discover.Discover = (*driverLibraries)(nil)
|
||||
|
||||
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
|
||||
// The supplied NVML Library is used to query the expected driver version.
|
||||
func NewDriverDiscoverer(logger *logrus.Logger, root string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||
}
|
||||
|
||||
libraries, err := NewDriverLibraryDiscoverer(logger, root, version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
|
||||
}
|
||||
|
||||
firmwares := NewDriverFirmwareDiscoverer(logger, root, version)
|
||||
|
||||
binaries := NewDriverBinariesDiscoverer(logger, root)
|
||||
|
||||
d := discover.Merge(
|
||||
libraries,
|
||||
firmwares,
|
||||
binaries,
|
||||
)
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
|
||||
func NewDriverLibraryDiscoverer(logger *logrus.Logger, root string, version string) (discover.Discover, error) {
|
||||
libraries, err := findVersionLibs(logger, root, version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
|
||||
}
|
||||
|
||||
d := driverLibraries{
|
||||
logger: logger,
|
||||
root: root,
|
||||
libraries: libraries,
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
// NewDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version.
|
||||
func NewDriverFirmwareDiscoverer(logger *logrus.Logger, root string, version string) discover.Discover {
|
||||
gspFirmwarePath := filepath.Join("/lib/firmware/nvidia", version, "gsp.bin")
|
||||
return discover.NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(root),
|
||||
),
|
||||
root,
|
||||
[]string{gspFirmwarePath},
|
||||
)
|
||||
}
|
||||
|
||||
// NewDriverBinariesDiscoverer creates a discoverer for GSP firmware associated with the GPU driver.
|
||||
func NewDriverBinariesDiscoverer(logger *logrus.Logger, root string) discover.Discover {
|
||||
return discover.NewMounts(
|
||||
logger,
|
||||
lookup.NewExecutableLocator(logger, root),
|
||||
root,
|
||||
[]string{
|
||||
"nvidia-smi", /* System management interface */
|
||||
"nvidia-debugdump", /* GPU coredump utility */
|
||||
"nvidia-persistenced", /* Persistence mode utility */
|
||||
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
||||
"nvidia-cuda-mps-server", /* Multi process service server */
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// Devices are empty for this discoverer
|
||||
func (d *driverLibraries) Devices() ([]discover.Device, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Mounts returns the mounts for the driver libraries
|
||||
func (d *driverLibraries) Mounts() ([]discover.Mount, error) {
|
||||
var mounts []discover.Mount
|
||||
for _, d := range d.libraries {
|
||||
mount := discover.Mount{
|
||||
HostPath: d,
|
||||
Path: d,
|
||||
}
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
|
||||
return mounts, nil
|
||||
}
|
||||
|
||||
// Hooks returns a hook that updates the LDCache for the specified driver library paths.
|
||||
func (d *driverLibraries) Hooks() ([]discover.Hook, error) {
|
||||
locator := lookup.NewExecutableLocator(d.logger, d.root)
|
||||
|
||||
hook := discover.CreateLDCacheUpdateHook(
|
||||
d.logger,
|
||||
locator,
|
||||
nvidiaCTKExecutable,
|
||||
nvidiaCTKDefaultFilePath,
|
||||
d.libraries,
|
||||
)
|
||||
|
||||
return []discover.Hook{hook}, nil
|
||||
}
|
||||
|
||||
func findVersionLibs(logger *logrus.Logger, root string, version string) ([]string, error) {
|
||||
logger.Infof("Using driver version %v", version)
|
||||
|
||||
cache, err := ldcache.New(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load ldcache: %v", err)
|
||||
}
|
||||
|
||||
libs32, libs64 := cache.List()
|
||||
|
||||
var libs []string
|
||||
for _, l := range libs64 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
logger.Infof("found 64-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
for _, l := range libs32 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
logger.Infof("found 32-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
return libs, nil
|
||||
}
|
||||
148
cmd/nvidia-ctk/cdi/generate/full-gpu.go
Normal file
148
cmd/nvidia-ctk/cdi/generate/full-gpu.go
Normal file
@@ -0,0 +1,148 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// fullGPUDiscoverer wraps a deviceDiscoverer and adds specifics required for discovering full GPUs
|
||||
type fullGPUDiscoverer struct {
|
||||
deviceDiscoverer
|
||||
|
||||
pciBusID string
|
||||
}
|
||||
|
||||
var _ discover.Discover = (*fullGPUDiscoverer)(nil)
|
||||
|
||||
// NewFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
|
||||
func NewFullGPUDiscoverer(logger *logrus.Logger, root string, d device.Device) (discover.Discover, error) {
|
||||
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
|
||||
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
|
||||
minor, ret := d.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
path := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
pciInfo, ret := d.GetPciInfo()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting PCI info for device: %v", ret)
|
||||
}
|
||||
pciBusID := getBusID(pciInfo)
|
||||
|
||||
drmDeviceNodes, err := drm.GetDeviceNodesByBusID(pciBusID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine DRM devices for %v: %v", pciBusID, err)
|
||||
}
|
||||
|
||||
deviceNodePaths := append([]string{path}, drmDeviceNodes...)
|
||||
|
||||
device := fullGPUDiscoverer{
|
||||
deviceDiscoverer: deviceDiscoverer{
|
||||
logger: logger,
|
||||
root: root,
|
||||
deviceNodePaths: deviceNodePaths,
|
||||
},
|
||||
pciBusID: pciBusID,
|
||||
}
|
||||
|
||||
return &device, nil
|
||||
}
|
||||
|
||||
// Hooks returns the hooks for the GPU device.
|
||||
// The following hooks are detected:
|
||||
// 1. A hook to create /dev/dri/by-path symlinks
|
||||
func (d *fullGPUDiscoverer) Hooks() ([]discover.Hook, error) {
|
||||
links, err := d.deviceNodeLinks()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover DRA device links: %v", err)
|
||||
}
|
||||
if len(links) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
hookPath := "nvidia-ctk"
|
||||
args := []string{hookPath, "hook", "create-symlinks"}
|
||||
for _, l := range links {
|
||||
args = append(args, "--link", l)
|
||||
}
|
||||
|
||||
var hooks []discover.Hook
|
||||
hook := discover.Hook{
|
||||
Lifecycle: "createContainer",
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
hooks = append(hooks, hook)
|
||||
|
||||
return hooks, nil
|
||||
}
|
||||
|
||||
// Mounts returns an empty slice for a full GPU
|
||||
func (d *fullGPUDiscoverer) Mounts() ([]discover.Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (d *fullGPUDiscoverer) deviceNodeLinks() ([]string, error) {
|
||||
candidates := []string{
|
||||
fmt.Sprintf("/dev/dri/by-path/pci-%s-card", d.pciBusID),
|
||||
fmt.Sprintf("/dev/dri/by-path/pci-%s-render", d.pciBusID),
|
||||
}
|
||||
|
||||
var links []string
|
||||
for _, c := range candidates {
|
||||
linkPath := filepath.Join(d.root, c)
|
||||
device, err := os.Readlink(linkPath)
|
||||
if err != nil {
|
||||
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath)
|
||||
continue
|
||||
}
|
||||
|
||||
d.logger.Debugf("adding device symlink %v -> %v", linkPath, device)
|
||||
links = append(links, fmt.Sprintf("%v::%v", device, linkPath))
|
||||
}
|
||||
|
||||
return links, nil
|
||||
}
|
||||
|
||||
// getBusID provides a utility function that returns the string representation of the bus ID.
|
||||
func getBusID(p nvml.PciInfo) string {
|
||||
var bytes []byte
|
||||
for _, b := range p.BusId {
|
||||
if byte(b) == '\x00' {
|
||||
break
|
||||
}
|
||||
bytes = append(bytes, byte(b))
|
||||
}
|
||||
id := strings.ToLower(string(bytes))
|
||||
|
||||
if id != "0000" {
|
||||
id = strings.TrimPrefix(id, "0000")
|
||||
}
|
||||
|
||||
return id
|
||||
}
|
||||
344
cmd/nvidia-ctk/cdi/generate/generate.go
Normal file
344
cmd/nvidia-ctk/cdi/generate/generate.go
Normal file
@@ -0,0 +1,344 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaCTKExecutable = "nvidia-ctk"
|
||||
nvidiaCTKDefaultFilePath = "/usr/bin/" + nvidiaCTKExecutable
|
||||
|
||||
formatJSON = "json"
|
||||
formatYAML = "yaml"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
output string
|
||||
format string
|
||||
root string
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build creates the CLI command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'generate-cdi' command
|
||||
c := cli.Command{
|
||||
Name: "generate",
|
||||
Usage: "Generate CDI specifications for use with CDI-enabled runtimes",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "output",
|
||||
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
|
||||
Destination: &cfg.output,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "format",
|
||||
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
|
||||
Value: formatYAML,
|
||||
Destination: &cfg.format,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "root",
|
||||
Usage: "Specify the root to use when discovering the entities that should be included in the CDI specification.",
|
||||
Destination: &cfg.root,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(r *cli.Context, cfg *config) error {
|
||||
cfg.format = strings.ToLower(cfg.format)
|
||||
switch cfg.format {
|
||||
case formatJSON:
|
||||
case formatYAML:
|
||||
default:
|
||||
return fmt.Errorf("invalid output format: %v", cfg.format)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
spec, err := m.generateSpec(cfg.root)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec: %v", err)
|
||||
}
|
||||
|
||||
var outputTo io.Writer
|
||||
if cfg.output == "" {
|
||||
outputTo = os.Stdout
|
||||
} else {
|
||||
err := createParentDirsIfRequired(cfg.output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create parent folders for output file: %v", err)
|
||||
}
|
||||
outputFile, err := os.Create(cfg.output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create output file: %v", err)
|
||||
}
|
||||
defer outputFile.Close()
|
||||
outputTo = outputFile
|
||||
}
|
||||
|
||||
if outputFileFormat := formatFromFilename(cfg.output); outputFileFormat != "" {
|
||||
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
|
||||
if !c.IsSet("format") {
|
||||
cfg.format = outputFileFormat
|
||||
} else if outputFileFormat != cfg.format {
|
||||
m.logger.Warningf("Requested output format %q does not match format implied by output file name: %q", cfg.format, outputFileFormat)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := yaml.Marshal(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal CDI spec: %v", err)
|
||||
}
|
||||
|
||||
if strings.ToLower(cfg.format) == formatJSON {
|
||||
data, err = yaml.YAMLToJSONStrict(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to convert CDI spec from YAML to JSON: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = writeToOutput(cfg.format, data, outputTo)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write output: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func formatFromFilename(filename string) string {
|
||||
ext := filepath.Ext(filename)
|
||||
switch strings.ToLower(ext) {
|
||||
case ".json":
|
||||
return formatJSON
|
||||
case ".yaml":
|
||||
return formatYAML
|
||||
case ".yml":
|
||||
return formatYAML
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func writeToOutput(format string, data []byte, output io.Writer) error {
|
||||
if format == formatYAML {
|
||||
_, err := output.Write([]byte("---\n"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write YAML separator: %v", err)
|
||||
}
|
||||
}
|
||||
_, err := output.Write(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write data: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) generateSpec(root string) (*specs.Spec, error) {
|
||||
nvmllib := nvml.New()
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, r
|
||||
}
|
||||
defer nvmllib.Shutdown()
|
||||
|
||||
devicelib := device.New(device.WithNvml(nvmllib))
|
||||
|
||||
deviceSpecs, err := m.generateDeviceSpecs(devicelib, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device CDI specs: %v", err)
|
||||
}
|
||||
|
||||
allDevice := createAllDevice(deviceSpecs)
|
||||
|
||||
deviceSpecs = append(deviceSpecs, allDevice)
|
||||
|
||||
allEdits := edits.NewContainerEdits()
|
||||
|
||||
ipcs, err := NewIPCDiscoverer(m.logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
|
||||
}
|
||||
|
||||
ipcEdits, err := edits.FromDiscoverer(ipcs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for IPC sockets: %v", err)
|
||||
}
|
||||
// TODO: We should not have to update this after the fact
|
||||
for _, s := range ipcEdits.Mounts {
|
||||
s.Options = append(s.Options, "noexec")
|
||||
}
|
||||
|
||||
allEdits.Append(ipcEdits)
|
||||
|
||||
common, err := NewCommonDiscoverer(m.logger, root, nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
|
||||
}
|
||||
|
||||
deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, root, deviceSpecs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
|
||||
}
|
||||
|
||||
commonEdits, err := edits.FromDiscoverer(discover.Merge(common, deviceFolderPermissionHooks))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for common entities: %v", err)
|
||||
}
|
||||
|
||||
allEdits.Append(commonEdits)
|
||||
|
||||
// Construct the spec
|
||||
// TODO: Use the code to determine the minimal version
|
||||
spec := specs.Spec{
|
||||
Version: "0.4.0",
|
||||
Kind: "nvidia.com/gpu",
|
||||
Devices: deviceSpecs,
|
||||
ContainerEdits: *allEdits.ContainerEdits,
|
||||
}
|
||||
|
||||
return &spec, nil
|
||||
}
|
||||
|
||||
func (m command) generateDeviceSpecs(devicelib device.Interface, root string) ([]specs.Device, error) {
|
||||
var deviceSpecs []specs.Device
|
||||
|
||||
err := devicelib.VisitDevices(func(i int, d device.Device) error {
|
||||
isMigEnabled, err := d.IsMigEnabled()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
|
||||
}
|
||||
if isMigEnabled {
|
||||
return nil
|
||||
}
|
||||
device, err := NewFullGPUDiscoverer(m.logger, root, d)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create device: %v", err)
|
||||
}
|
||||
|
||||
deviceEdits, err := edits.FromDiscoverer(device)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create container edits for device: %v", err)
|
||||
}
|
||||
|
||||
deviceSpec := specs.Device{
|
||||
Name: fmt.Sprintf("gpu%d", i),
|
||||
ContainerEdits: *deviceEdits.ContainerEdits,
|
||||
}
|
||||
|
||||
deviceSpecs = append(deviceSpecs, deviceSpec)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
|
||||
}
|
||||
|
||||
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
|
||||
device, err := NewMigDeviceDiscoverer(m.logger, "", d, mig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create MIG device: %v", err)
|
||||
}
|
||||
|
||||
deviceEdits, err := edits.FromDiscoverer(device)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create container edits for MIG device: %v", err)
|
||||
}
|
||||
|
||||
deviceSpec := specs.Device{
|
||||
Name: fmt.Sprintf("mig%v:%v", i, j),
|
||||
ContainerEdits: *deviceEdits.ContainerEdits,
|
||||
}
|
||||
|
||||
deviceSpecs = append(deviceSpecs, deviceSpec)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
|
||||
}
|
||||
|
||||
return deviceSpecs, nil
|
||||
}
|
||||
|
||||
// createAllDevice creates an 'all' device which combines the edits from the previous devices
|
||||
func createAllDevice(deviceSpecs []specs.Device) specs.Device {
|
||||
edits := edits.NewContainerEdits()
|
||||
|
||||
for _, d := range deviceSpecs {
|
||||
edit := cdi.ContainerEdits{
|
||||
ContainerEdits: &d.ContainerEdits,
|
||||
}
|
||||
edits.Append(&edit)
|
||||
}
|
||||
|
||||
all := specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: *edits.ContainerEdits,
|
||||
}
|
||||
return all
|
||||
}
|
||||
|
||||
// createParentDirsIfRequired creates the parent folders of the specified path if requried.
|
||||
// Note that MkdirAll does not specifically check whether the specified path is non-empty and raises an error if it is.
|
||||
// The path will be empty if filename in the current folder is specified, for example
|
||||
func createParentDirsIfRequired(filename string) error {
|
||||
dir := filepath.Dir(filename)
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
return os.MkdirAll(dir, 0755)
|
||||
}
|
||||
42
cmd/nvidia-ctk/cdi/generate/ipc.go
Normal file
42
cmd/nvidia-ctk/cdi/generate/ipc.go
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
|
||||
func NewIPCDiscoverer(logger *logrus.Logger, root string) (discover.Discover, error) {
|
||||
d := discover.NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(root),
|
||||
),
|
||||
root,
|
||||
[]string{
|
||||
"/var/run/nvidia-persistenced/socket",
|
||||
"/var/run/nvidia-fabricmanager/socket",
|
||||
"/tmp/nvidia-mps",
|
||||
},
|
||||
)
|
||||
|
||||
return d, nil
|
||||
}
|
||||
84
cmd/nvidia-ctk/cdi/generate/mig-device.go
Normal file
84
cmd/nvidia-ctk/cdi/generate/mig-device.go
Normal file
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package generate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// migDeviceDiscoverer wraps a deviceDiscoverer and adds specifics required for discovering MIG devices.
|
||||
type migDeviceDiscoverer struct {
|
||||
deviceDiscoverer
|
||||
}
|
||||
|
||||
var _ discover.Discover = (*migDeviceDiscoverer)(nil)
|
||||
|
||||
// NewMigDeviceDiscoverer creates a discoverer for the specified mig device and its parent.
|
||||
func NewMigDeviceDiscoverer(logger *logrus.Logger, root string, parent device.Device, d device.MigDevice) (discover.Discover, error) {
|
||||
minor, ret := parent.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
migCaps, err := nvcaps.NewMigCaps()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
|
||||
}
|
||||
|
||||
gi, ret := d.GetGpuInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
ci, ret := d.GetComputeInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
|
||||
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
|
||||
}
|
||||
|
||||
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
|
||||
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
||||
}
|
||||
|
||||
m := migDeviceDiscoverer{
|
||||
deviceDiscoverer: deviceDiscoverer{
|
||||
logger: logger,
|
||||
root: root,
|
||||
deviceNodePaths: []string{
|
||||
parentPath,
|
||||
giCapDevicePath,
|
||||
ciCapDevicePath,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return &m, nil
|
||||
}
|
||||
140
cmd/nvidia-ctk/hook/chmod/chmod.go
Normal file
140
cmd/nvidia-ctk/hook/chmod/chmod.go
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package chmod
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
paths cli.StringSlice
|
||||
mode string
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a chmod command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the chmod command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'chmod' command
|
||||
c := cli.Command{
|
||||
Name: "chmod",
|
||||
Usage: "Set the permissions of folders in the container by running chmod. The container root is prefixed to the specified paths.",
|
||||
Before: func(c *cli.Context) error {
|
||||
return validateFlags(c, &cfg)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "path",
|
||||
Usage: "Specifiy a path to apply the specified mode to",
|
||||
Destination: &cfg.paths,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "mode",
|
||||
Usage: "Specify the file mode",
|
||||
Destination: &cfg.mode,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func validateFlags(c *cli.Context, cfg *config) error {
|
||||
if strings.TrimSpace(cfg.mode) == "" {
|
||||
return fmt.Errorf("a non-empty mode must be specified")
|
||||
}
|
||||
|
||||
for _, p := range cfg.paths.Value() {
|
||||
if strings.TrimSpace(p) == "" {
|
||||
return fmt.Errorf("paths must not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
if containerRoot == "" {
|
||||
return fmt.Errorf("empty container root detected")
|
||||
}
|
||||
|
||||
paths := m.getPaths(containerRoot, cfg.paths.Value())
|
||||
if len(paths) == 0 {
|
||||
m.logger.Debugf("No paths specified; exiting")
|
||||
return nil
|
||||
}
|
||||
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
targets, err := locator.Locate("chmod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to locate chmod: %v", err)
|
||||
}
|
||||
chmodPath := targets[0]
|
||||
|
||||
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
|
||||
|
||||
return syscall.Exec(chmodPath, args, nil)
|
||||
}
|
||||
|
||||
// getPaths updates the specified paths relative to the root.
|
||||
func (m command) getPaths(root string, paths []string) []string {
|
||||
var pathsInRoot []string
|
||||
for _, f := range paths {
|
||||
pathsInRoot = append(pathsInRoot, filepath.Join(root, f))
|
||||
}
|
||||
|
||||
return pathsInRoot
|
||||
}
|
||||
@@ -74,7 +74,7 @@ func (m command) build() *cli.Command {
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "link",
|
||||
Usage: "Specify a specific link to create. The link is specified as source:target",
|
||||
Usage: "Specify a specific link to create. The link is specified as target::link",
|
||||
Destination: &cfg.links,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
@@ -145,7 +145,7 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
||||
|
||||
links := cfg.links.Value()
|
||||
for _, l := range links {
|
||||
parts := strings.Split(l, ":")
|
||||
parts := strings.Split(l, "::")
|
||||
if len(parts) != 2 {
|
||||
m.logger.Warnf("Invalid link specification %v", l)
|
||||
continue
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package hook
|
||||
|
||||
import (
|
||||
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
|
||||
"github.com/sirupsen/logrus"
|
||||
@@ -46,6 +47,7 @@ func (m hookCommand) build() *cli.Command {
|
||||
hook.Subcommands = []*cli.Command{
|
||||
ldcache.NewCommand(m.logger),
|
||||
symlinks.NewCommand(m.logger),
|
||||
chmod.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
|
||||
47
cmd/nvidia-ctk/info/info.go
Normal file
47
cmd/nvidia-ctk/info/info.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an info command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "info",
|
||||
Usage: "Provide information about the system",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{}
|
||||
|
||||
return &hook
|
||||
}
|
||||
@@ -19,8 +19,12 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
|
||||
infoCLI "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
@@ -70,6 +74,9 @@ func main() {
|
||||
// Define the subcommands
|
||||
c.Commands = []*cli.Command{
|
||||
hook.NewCommand(logger),
|
||||
runtime.NewCommand(logger),
|
||||
infoCLI.NewCommand(logger),
|
||||
cdi.NewCommand(logger),
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
|
||||
203
cmd/nvidia-ctk/runtime/configure/configure.go
Normal file
203
cmd/nvidia-ctk/runtime/configure/configure.go
Normal file
@@ -0,0 +1,203 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package configure
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker"
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultRuntime = "docker"
|
||||
|
||||
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
|
||||
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an configure command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// config defines the options that can be set for the CLI through config files,
|
||||
// environment variables, or command line config
|
||||
type config struct {
|
||||
dryRun bool
|
||||
runtime string
|
||||
configFilePath string
|
||||
nvidiaOptions nvidia.Options
|
||||
}
|
||||
|
||||
func (m command) build() *cli.Command {
|
||||
// Create a config struct to hold the parsed environment variables or command line flags
|
||||
config := config{}
|
||||
|
||||
// Create the 'configure' command
|
||||
configure := cli.Command{
|
||||
Name: "configure",
|
||||
Usage: "Add a runtime to the specified container engine",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.configureWrapper(c, &config)
|
||||
},
|
||||
}
|
||||
|
||||
configure.Flags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "update the runtime configuration as required but don't write changes to disk",
|
||||
Destination: &config.dryRun,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime",
|
||||
Usage: "the target runtime engine. One of [crio, docker]",
|
||||
Value: defaultRuntime,
|
||||
Destination: &config.runtime,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config",
|
||||
Usage: "path to the config file for the target runtime",
|
||||
Destination: &config.configFilePath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-runtime-name",
|
||||
Usage: "specify the name of the NVIDIA runtime that will be added",
|
||||
Value: nvidia.RuntimeName,
|
||||
Destination: &config.nvidiaOptions.RuntimeName,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime-path",
|
||||
Usage: "specify the path to the NVIDIA runtime executable",
|
||||
Value: nvidia.RuntimeExecutable,
|
||||
Destination: &config.nvidiaOptions.RuntimePath,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "set-as-default",
|
||||
Usage: "set the specified runtime as the default runtime",
|
||||
Destination: &config.nvidiaOptions.SetAsDefault,
|
||||
},
|
||||
}
|
||||
|
||||
return &configure
|
||||
}
|
||||
|
||||
func (m command) configureWrapper(c *cli.Context, config *config) error {
|
||||
switch config.runtime {
|
||||
case "crio":
|
||||
return m.configureCrio(c, config)
|
||||
case "docker":
|
||||
return m.configureDocker(c, config)
|
||||
}
|
||||
|
||||
return fmt.Errorf("unrecognized runtime '%v'", config.runtime)
|
||||
}
|
||||
|
||||
// configureDocker updates the docker config to enable the NVIDIA Container Runtime
|
||||
func (m command) configureDocker(c *cli.Context, config *config) error {
|
||||
configFilePath := config.configFilePath
|
||||
if configFilePath == "" {
|
||||
configFilePath = defaultDockerConfigFilePath
|
||||
}
|
||||
|
||||
cfg, err := docker.LoadConfig(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = docker.UpdateConfig(
|
||||
cfg,
|
||||
config.nvidiaOptions.RuntimeName,
|
||||
config.nvidiaOptions.RuntimePath,
|
||||
config.nvidiaOptions.SetAsDefault,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
if config.dryRun {
|
||||
output, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to JSON: %v", err)
|
||||
}
|
||||
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
|
||||
return nil
|
||||
}
|
||||
err = docker.FlushConfig(cfg, configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Wrote updated config to %v", configFilePath)
|
||||
m.logger.Infof("It is recommended that the docker daemon be restarted.")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// configureCrio updates the crio config to enable the NVIDIA Container Runtime
|
||||
func (m command) configureCrio(c *cli.Context, config *config) error {
|
||||
configFilePath := config.configFilePath
|
||||
if configFilePath == "" {
|
||||
configFilePath = defaultCrioConfigFilePath
|
||||
}
|
||||
|
||||
cfg, err := crio.LoadConfig(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
err = crio.UpdateConfig(
|
||||
cfg,
|
||||
config.nvidiaOptions.RuntimeName,
|
||||
config.nvidiaOptions.RuntimePath,
|
||||
config.nvidiaOptions.SetAsDefault,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
if config.dryRun {
|
||||
output, err := toml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to TOML: %v", err)
|
||||
}
|
||||
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
|
||||
return nil
|
||||
}
|
||||
err = crio.FlushConfig(configFilePath, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Wrote updated config to %v", configFilePath)
|
||||
m.logger.Infof("It is recommended that the cri-o daemon be restarted.")
|
||||
|
||||
return nil
|
||||
}
|
||||
75
cmd/nvidia-ctk/runtime/nvidia/nvidia.go
Normal file
75
cmd/nvidia-ctk/runtime/nvidia/nvidia.go
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvidia
|
||||
|
||||
const (
|
||||
// RuntimeName is the default name to use in configs for the NVIDIA Container Runtime
|
||||
RuntimeName = "nvidia"
|
||||
// RuntimeExecutable is the default NVIDIA Container Runtime executable file name
|
||||
RuntimeExecutable = "nvidia-container-runtime"
|
||||
)
|
||||
|
||||
// Options specifies the options for the NVIDIA Container Runtime w.r.t a container engine such as docker.
|
||||
type Options struct {
|
||||
SetAsDefault bool
|
||||
RuntimeName string
|
||||
RuntimePath string
|
||||
}
|
||||
|
||||
// Runtime defines an NVIDIA runtime with a name and a executable
|
||||
type Runtime struct {
|
||||
Name string
|
||||
Path string
|
||||
}
|
||||
|
||||
// DefaultRuntime returns the default runtime for the configured options.
|
||||
// If the configuration is invalid or the default runtimes should not be set
|
||||
// the empty string is returned.
|
||||
func (o Options) DefaultRuntime() string {
|
||||
if !o.SetAsDefault {
|
||||
return ""
|
||||
}
|
||||
|
||||
return o.RuntimeName
|
||||
}
|
||||
|
||||
// Runtime creates a runtime struct based on the options.
|
||||
func (o Options) Runtime() Runtime {
|
||||
path := o.RuntimePath
|
||||
|
||||
if o.RuntimePath == "" {
|
||||
path = RuntimeExecutable
|
||||
}
|
||||
|
||||
r := Runtime{
|
||||
Name: o.RuntimeName,
|
||||
Path: path,
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// DockerRuntimesConfig generatest the expected docker config for the specified runtime
|
||||
func (r Runtime) DockerRuntimesConfig() map[string]interface{} {
|
||||
runtimes := make(map[string]interface{})
|
||||
runtimes[r.Name] = map[string]interface{}{
|
||||
"path": r.Path,
|
||||
"args": []string{},
|
||||
}
|
||||
|
||||
return runtimes
|
||||
}
|
||||
49
cmd/nvidia-ctk/runtime/runtime.go
Normal file
49
cmd/nvidia-ctk/runtime/runtime.go
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type runtimeCommand struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs a runtime command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := runtimeCommand{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
func (m runtimeCommand) build() *cli.Command {
|
||||
// Create the 'runtime' command
|
||||
runtime := cli.Command{
|
||||
Name: "runtime",
|
||||
Usage: "A collection of runtime-related utilities for the NVIDIA Container Toolkit",
|
||||
}
|
||||
|
||||
runtime.Subcommands = []*cli.Command{
|
||||
configure.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &runtime
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
#path = "/usr/bin/nvidia-container-cli"
|
||||
environment = []
|
||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||
#ldcache = "/etc/ld.so.cache"
|
||||
load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
@@ -16,4 +16,17 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
@@ -16,4 +16,17 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
@@ -16,4 +16,17 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
@@ -16,7 +16,7 @@ ldconfig = "@/sbin/ldconfig.real"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
@@ -24,3 +24,9 @@ runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
ARG BASEIMAGE
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
wget \
|
||||
git \
|
||||
rpm-build \
|
||||
make && \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
RUN set -eux; \
|
||||
\
|
||||
arch="$(uname -m)"; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture"; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
| tar -C /usr/local -xz
|
||||
|
||||
ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
RUN mkdir -p $DIST_DIR /dist
|
||||
|
||||
# nvidia-container-toolkit
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
# This might not be useful on Amazon Linux, but it's simpler to keep the RHEL
|
||||
# and Amazon Linux packages identical.
|
||||
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
|
||||
|
||||
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
|
||||
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
|
||||
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
@@ -32,6 +32,7 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
@@ -64,12 +65,17 @@ RUN if [ "$(lsb_release -cs)" = "jessie" ]; then \
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
|
||||
RUN sed -i "s;@VERSION@;${REVISION};" debian/changelog && \
|
||||
dch --changelog debian/changelog --append "Bump libnvidia-container dependency to ${REVISION}}" && \
|
||||
dch --changelog debian/changelog -r "" && \
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
RUN dch --create --package="${PKG_NAME}" \
|
||||
--newversion "${REVISION}" \
|
||||
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
|
||||
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER1_VERSION}" && \
|
||||
dch -r "" && \
|
||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||
|
||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_VERSION="${REVISION}" \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/nvidia-container-toolkit_*.deb /dist
|
||||
mv /tmp/*.deb /dist
|
||||
|
||||
@@ -14,7 +14,8 @@
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
FROM golang:${GOLANG_VERSION}
|
||||
|
||||
RUN go install golang.org/x/lint/golint@latest
|
||||
RUN go install golang.org/x/lint/golint@6edffad5e6160f5949cdefc81710b2706fbcd4f6
|
||||
RUN go install github.com/matryer/moq@latest
|
||||
RUN go install github.com/gordonklaus/ineffassign@latest
|
||||
RUN go install github.com/gordonklaus/ineffassign@d2c82e48359b033cde9cf1307f6d5550b8d61321
|
||||
RUN go install github.com/client9/misspell/cmd/misspell@latest
|
||||
RUN go install github.com/google/go-licenses@latest
|
||||
|
||||
@@ -25,11 +25,12 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
ENV PKG_NAME ${PKG_NAME}
|
||||
ENV PKG_VERS ${PKG_VERS}
|
||||
ENV PKG_REV ${PKG_REV}
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
@@ -56,11 +57,16 @@ COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version ${PKG_VERS}" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release ${PKG_REV}" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
|
||||
@@ -1,3 +1,19 @@
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This is the dockerfile for building packages on yum-based RPM systems.
|
||||
|
||||
ARG BASEIMAGE
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
@@ -27,11 +43,12 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
ENV PKG_NAME ${PKG_NAME}
|
||||
ENV PKG_VERS ${PKG_VERS}
|
||||
ENV PKG_REV ${PKG_REV}
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
@@ -58,11 +75,16 @@ COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version ${PKG_VERS}" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release ${PKG_REV}" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
@@ -30,6 +30,7 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
@@ -57,12 +58,17 @@ COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
|
||||
RUN sed -i "s;@VERSION@;${REVISION};" debian/changelog && \
|
||||
dch --changelog debian/changelog --append "Bump libnvidia-container dependency to ${REVISION}}" && \
|
||||
dch --changelog debian/changelog -r "" && \
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
RUN dch --create --package="${PKG_NAME}" \
|
||||
--newversion "${REVISION}" \
|
||||
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
|
||||
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" && \
|
||||
dch -r "" && \
|
||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||
|
||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_VERSION="${REVISION}" \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/*.deb /dist
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
# Supported OSs by architecture
|
||||
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
|
||||
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
|
||||
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
|
||||
AARCH64_TARGETS := centos8 rhel8 amazonlinux2
|
||||
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
|
||||
|
||||
# Define top-level build targets
|
||||
docker%: SHELL:=/bin/bash
|
||||
@@ -85,37 +85,63 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
||||
--%: docker-build-%
|
||||
@
|
||||
|
||||
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
|
||||
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
|
||||
|
||||
# private ubuntu target
|
||||
--ubuntu%: OS := ubuntu
|
||||
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--ubuntu%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
--ubuntu%: PKG_REV := 1
|
||||
|
||||
# private debian target
|
||||
--debian%: OS := debian
|
||||
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--debian%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
--debian%: PKG_REV := 1
|
||||
|
||||
# private centos target
|
||||
--centos%: OS := centos
|
||||
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private fedora target
|
||||
--fedora%: OS := fedora
|
||||
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
# The fedora(35) base image has very slow performance when building aarch64 packages.
|
||||
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
|
||||
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private amazonlinux target
|
||||
--amazonlinux%: OS := amazonlinux
|
||||
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
|
||||
# private opensuse-leap target
|
||||
--opensuse-leap%: OS = opensuse-leap
|
||||
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
|
||||
--opensuse-leap%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
# private rhel target (actually built on centos)
|
||||
--rhel%: OS := centos
|
||||
--rhel%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
|
||||
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
|
||||
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--rhel%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
|
||||
# We allow the CONFIG_TOML_SUFFIX to be overridden.
|
||||
CONFIG_TOML_SUFFIX ?= $(OS)
|
||||
|
||||
@@ -128,10 +154,12 @@ docker-build-%:
|
||||
--progress=plain \
|
||||
--build-arg BASEIMAGE="$(BASEIMAGE)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg PKG_NAME="$(LIB_NAME)" \
|
||||
--build-arg PKG_VERS="$(LIB_VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
|
||||
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
|
||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
--file $(DOCKERFILE) .
|
||||
$(DOCKER) run \
|
||||
|
||||
37
go.mod
37
go.mod
@@ -1,18 +1,41 @@
|
||||
module github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
go 1.14
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.0.0
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.3.1-0.20220224133719-e5457123010b
|
||||
github.com/containers/podman/v4 v4.0.3
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.5.2
|
||||
github.com/opencontainers/runc v1.1.4
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab
|
||||
github.com/pelletier/go-toml v1.9.4
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/tsaikd/KDGoLib v0.0.0-20191001134900-7f3cf518e07d
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220922133427-1049a7fa76a9
|
||||
golang.org/x/mod v0.5.0
|
||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
|
||||
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6
|
||||
sigs.k8s.io/yaml v1.3.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/blang/semver v3.5.1+incompatible // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/fsnotify/fsnotify v1.5.4 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20220110225228-7e2d60f1e41f // indirect
|
||||
github.com/opencontainers/selinux v1.10.1 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
|
||||
)
|
||||
|
||||
@@ -61,7 +61,7 @@ func GetConfig() (*Config, error) {
|
||||
|
||||
tomlFile, err := os.Open(configFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open config file %v: %v", configFilePath, err)
|
||||
return getDefaultConfig(), nil
|
||||
}
|
||||
defer tomlFile.Close()
|
||||
|
||||
|
||||
125
internal/config/crio/crio.go
Normal file
125
internal/config/crio/crio.go
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package crio
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// LoadConfig loads the cri-o config from disk
|
||||
func LoadConfig(config string) (*toml.Tree, error) {
|
||||
log.Infof("Loading config: %v", config)
|
||||
|
||||
info, err := os.Stat(config)
|
||||
if os.IsExist(err) && info.IsDir() {
|
||||
return nil, fmt.Errorf("config file is a directory")
|
||||
}
|
||||
|
||||
configFile := config
|
||||
if os.IsNotExist(err) {
|
||||
configFile = "/dev/null"
|
||||
log.Infof("Config file does not exist, creating new one")
|
||||
}
|
||||
|
||||
cfg, err := toml.LoadFile(configFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Infof("Successfully loaded config")
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime
|
||||
func UpdateConfig(config *toml.Tree, runtimeClass string, runtimePath string, setAsDefault bool) error {
|
||||
switch runc := config.Get("crio.runtime.runtimes.runc").(type) {
|
||||
case *toml.Tree:
|
||||
runc, _ = toml.Load(runc.String())
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass}, runc)
|
||||
}
|
||||
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_path"}, runtimePath)
|
||||
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_type"}, "oci")
|
||||
|
||||
if setAsDefault {
|
||||
config.SetPath([]string{"crio", "runtime", "default_runtime"}, runtimeClass)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime
|
||||
func RevertConfig(config *toml.Tree, runtimeClass string) error {
|
||||
if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok {
|
||||
if runtimeClass == runtime {
|
||||
config.DeletePath([]string{"crio", "runtime", "default_runtime"})
|
||||
}
|
||||
}
|
||||
|
||||
runtimeClassPath := []string{"crio", "runtime", "runtimes", runtimeClass}
|
||||
config.DeletePath(runtimeClassPath)
|
||||
for i := 0; i < len(runtimeClassPath); i++ {
|
||||
remainingPath := runtimeClassPath[:len(runtimeClassPath)-i]
|
||||
if entry, ok := config.GetPath(remainingPath).(*toml.Tree); ok {
|
||||
if len(entry.Keys()) != 0 {
|
||||
break
|
||||
}
|
||||
config.DeletePath(remainingPath)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FlushConfig flushes the updated/reverted config out to disk
|
||||
func FlushConfig(config string, cfg *toml.Tree) error {
|
||||
log.Infof("Flushing config")
|
||||
|
||||
output, err := cfg.ToTomlString()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to TOML: %v", err)
|
||||
}
|
||||
|
||||
switch len(output) {
|
||||
case 0:
|
||||
err := os.Remove(config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to remove empty file: %v", err)
|
||||
}
|
||||
log.Infof("Config empty, removing file")
|
||||
default:
|
||||
f, err := os.Create(config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open '%v' for writing: %v", config, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Successfully flushed config")
|
||||
|
||||
return nil
|
||||
}
|
||||
117
internal/config/docker/docker.go
Normal file
117
internal/config/docker/docker.go
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// LoadConfig loads the docker config from disk
|
||||
func LoadConfig(configFilePath string) (map[string]interface{}, error) {
|
||||
log.Infof("Loading docker config from %v", configFilePath)
|
||||
|
||||
info, err := os.Stat(configFilePath)
|
||||
if os.IsExist(err) && info.IsDir() {
|
||||
return nil, fmt.Errorf("config file is a directory")
|
||||
}
|
||||
|
||||
cfg := make(map[string]interface{})
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
log.Infof("Config file does not exist, creating new one")
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
readBytes, err := ioutil.ReadFile(configFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read config: %v", err)
|
||||
}
|
||||
|
||||
reader := bytes.NewReader(readBytes)
|
||||
if err := json.NewDecoder(reader).Decode(&cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Infof("Successfully loaded config")
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// UpdateConfig updates the docker config to include the nvidia runtimes
|
||||
func UpdateConfig(config map[string]interface{}, runtimeName string, runtimePath string, setAsDefault bool) error {
|
||||
// Read the existing runtimes
|
||||
runtimes := make(map[string]interface{})
|
||||
if _, exists := config["runtimes"]; exists {
|
||||
runtimes = config["runtimes"].(map[string]interface{})
|
||||
}
|
||||
|
||||
// Add / update the runtime definitions
|
||||
runtimes[runtimeName] = map[string]interface{}{
|
||||
"path": runtimePath,
|
||||
"args": []string{},
|
||||
}
|
||||
|
||||
// Update the runtimes definition
|
||||
if len(runtimes) > 0 {
|
||||
config["runtimes"] = runtimes
|
||||
}
|
||||
|
||||
if setAsDefault {
|
||||
config["default-runtime"] = runtimeName
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FlushConfig flushes the updated/reverted config out to disk
|
||||
func FlushConfig(cfg map[string]interface{}, configFilePath string) error {
|
||||
log.Infof("Flushing docker config to %v", configFilePath)
|
||||
|
||||
output, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to JSON: %v", err)
|
||||
}
|
||||
|
||||
switch len(output) {
|
||||
case 0:
|
||||
err := os.Remove(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to remove empty file: %v", err)
|
||||
}
|
||||
log.Infof("Config empty, removing file")
|
||||
default:
|
||||
f, err := os.Create(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open %v for writing: %v", configFilePath, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(string(output))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Successfully flushed config")
|
||||
|
||||
return nil
|
||||
}
|
||||
215
internal/config/docker/docker_test.go
Normal file
215
internal/config/docker/docker_test.go
Normal file
@@ -0,0 +1,215 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestUpdateConfigDefaultRuntime(t *testing.T) {
|
||||
testCases := []struct {
|
||||
config map[string]interface{}
|
||||
runtimeName string
|
||||
setAsDefault bool
|
||||
expectedDefaultRuntimeName interface{}
|
||||
}{
|
||||
{
|
||||
setAsDefault: false,
|
||||
expectedDefaultRuntimeName: nil,
|
||||
},
|
||||
{
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: true,
|
||||
expectedDefaultRuntimeName: "NAME",
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"default-runtime": "ALREADY_SET",
|
||||
},
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: false,
|
||||
expectedDefaultRuntimeName: "ALREADY_SET",
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"default-runtime": "ALREADY_SET",
|
||||
},
|
||||
runtimeName: "NAME",
|
||||
setAsDefault: true,
|
||||
expectedDefaultRuntimeName: "NAME",
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
if tc.config == nil {
|
||||
tc.config = make(map[string]interface{})
|
||||
}
|
||||
err := UpdateConfig(tc.config, tc.runtimeName, "", tc.setAsDefault)
|
||||
require.NoError(t, err)
|
||||
|
||||
defaultRuntimeName := tc.config["default-runtime"]
|
||||
require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateConfigRuntimes(t *testing.T) {
|
||||
testCases := []struct {
|
||||
config map[string]interface{}
|
||||
runtimes map[string]string
|
||||
expectedConfig map[string]interface{}
|
||||
}{
|
||||
{
|
||||
config: map[string]interface{}{},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
"runtime2": "/test/runtime/dir/runtime2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
"runtime2": "/test/runtime/dir/runtime2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"not-nvidia": map[string]interface{}{
|
||||
"path": "some-other-path",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"not-nvidia": map[string]interface{}{
|
||||
"path": "some-other-path",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
runtimes: map[string]string{
|
||||
"runtime1": "/test/runtime/dir/runtime1",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
for runtimeName, runtimePath := range tc.runtimes {
|
||||
err := UpdateConfig(tc.config, runtimeName, runtimePath, false)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
configContent, err := json.MarshalIndent(tc.config, "", " ")
|
||||
require.NoError(t, err)
|
||||
|
||||
expectedContent, err := json.MarshalIndent(tc.expectedConfig, "", " ")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, string(expectedContent), string(configContent))
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
54
internal/config/image/capabilities.go
Normal file
54
internal/config/image/capabilities.go
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
// DriverCapability represents the possible values of NVIDIA_DRIVER_CAPABILITIES
|
||||
type DriverCapability string
|
||||
|
||||
// Constants for the supported driver capabilities
|
||||
const (
|
||||
DriverCapabilityAll DriverCapability = "all"
|
||||
DriverCapabilityCompat32 DriverCapability = "compat32"
|
||||
DriverCapabilityCompute DriverCapability = "compute"
|
||||
DriverCapabilityDisplay DriverCapability = "display"
|
||||
DriverCapabilityGraphics DriverCapability = "graphics"
|
||||
DriverCapabilityNgx DriverCapability = "ngx"
|
||||
DriverCapabilityUtility DriverCapability = "utility"
|
||||
DriverCapabilityVideo DriverCapability = "video"
|
||||
)
|
||||
|
||||
// DriverCapabilities represents the NVIDIA_DRIVER_CAPABILITIES set for the specified image.
|
||||
type DriverCapabilities map[DriverCapability]bool
|
||||
|
||||
// Has check whether the specified capability is selected.
|
||||
func (c DriverCapabilities) Has(capability DriverCapability) bool {
|
||||
if c[DriverCapabilityAll] {
|
||||
return true
|
||||
}
|
||||
return c[capability]
|
||||
}
|
||||
|
||||
// Any checks whether any of the specified capabilites are set
|
||||
func (c DriverCapabilities) Any(capabilities ...DriverCapability) bool {
|
||||
for _, cap := range capabilities {
|
||||
if c.Has(cap) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
@@ -26,10 +26,12 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
)
|
||||
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
@@ -84,7 +86,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range i {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
@@ -111,6 +113,51 @@ func (i CUDA) HasDisableRequire() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
// We concantenate all the devices from the specified envvars.
|
||||
var isSet bool
|
||||
var devices []string
|
||||
requested := make(map[string]bool)
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := i[envVar]; ok {
|
||||
isSet = true
|
||||
for _, d := range strings.Split(devs, ",") {
|
||||
trimmed := strings.TrimSpace(d)
|
||||
if len(trimmed) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, trimmed)
|
||||
requested[trimmed] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if !isSet && len(devices) == 0 && i.IsLegacy() {
|
||||
return NewVisibleDevices("all")
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if len(devices) == 0 || requested["void"] {
|
||||
return NewVisibleDevices("void")
|
||||
}
|
||||
|
||||
return NewVisibleDevices(devices...)
|
||||
}
|
||||
|
||||
// GetDriverCapabilities returns the requested driver capabilities.
|
||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
env := i[envNVDriverCapabilities]
|
||||
|
||||
capabilites := make(DriverCapabilities)
|
||||
for _, c := range strings.Split(env, ",") {
|
||||
capabilites[DriverCapability(c)] = true
|
||||
}
|
||||
|
||||
return capabilites
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
|
||||
if err != nil {
|
||||
|
||||
@@ -69,3 +69,55 @@ func TestParseMajorMinorVersionInvalid(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRequirements(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
env []string
|
||||
requirements []string
|
||||
}{
|
||||
{
|
||||
description: "NVIDIA_REQUIRE_JETPACK is ignored",
|
||||
env: []string{"NVIDIA_REQUIRE_JETPACK=csv-mounts=all"},
|
||||
requirements: nil,
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS is ignored",
|
||||
env: []string{"NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS=base-only"},
|
||||
requirements: nil,
|
||||
},
|
||||
{
|
||||
description: "single requirement set",
|
||||
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6"},
|
||||
requirements: []string{"cuda>=11.6"},
|
||||
},
|
||||
{
|
||||
description: "requirements are concatenated requirement set",
|
||||
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
|
||||
requirements: []string{"cuda>=11.6", "brand=tesla"},
|
||||
},
|
||||
{
|
||||
description: "legacy image",
|
||||
env: []string{"CUDA_VERSION=11.6"},
|
||||
requirements: []string{"cuda>=11.6"},
|
||||
},
|
||||
{
|
||||
description: "legacy image with additional requirement",
|
||||
env: []string{"CUDA_VERSION=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
|
||||
requirements: []string{"cuda>=11.6", "brand=tesla"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, err := NewCUDAImageFromEnv(tc.env)
|
||||
require.NoError(t, err)
|
||||
|
||||
requirements, err := image.GetRequirements()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, tc.requirements, requirements)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
127
internal/config/image/devices.go
Normal file
127
internal/config/image/devices.go
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// VisibleDevices represents the devices selected in a container image
|
||||
// through the NVIDIA_VISIBLE_DEVICES or other environment variables
|
||||
type VisibleDevices interface {
|
||||
List() []string
|
||||
Has(string) bool
|
||||
}
|
||||
|
||||
var _ VisibleDevices = (*all)(nil)
|
||||
var _ VisibleDevices = (*none)(nil)
|
||||
var _ VisibleDevices = (*void)(nil)
|
||||
var _ VisibleDevices = (*devices)(nil)
|
||||
|
||||
// NewVisibleDevices creates a VisibleDevices based on the value of the specified envvar.
|
||||
func NewVisibleDevices(envvars ...string) VisibleDevices {
|
||||
for _, envvar := range envvars {
|
||||
if envvar == "all" {
|
||||
return all{}
|
||||
}
|
||||
if envvar == "none" {
|
||||
return none{}
|
||||
}
|
||||
if envvar == "" || envvar == "void" {
|
||||
return void{}
|
||||
}
|
||||
}
|
||||
|
||||
return newDevices(envvars...)
|
||||
}
|
||||
|
||||
type all struct{}
|
||||
|
||||
// List returns ["all"] for all devices
|
||||
func (a all) List() []string {
|
||||
return []string{"all"}
|
||||
}
|
||||
|
||||
// Has for all devices is true for any id except the empty ID
|
||||
func (a all) Has(id string) bool {
|
||||
return id != ""
|
||||
}
|
||||
|
||||
type none struct{}
|
||||
|
||||
// List returns [""] for the none devices
|
||||
func (n none) List() []string {
|
||||
return []string{""}
|
||||
}
|
||||
|
||||
// Has for none devices is false for any id
|
||||
func (n none) Has(id string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
type void struct {
|
||||
none
|
||||
}
|
||||
|
||||
// List returns nil for the void devices
|
||||
func (v void) List() []string {
|
||||
return nil
|
||||
}
|
||||
|
||||
type devices struct {
|
||||
len int
|
||||
lookup map[string]int
|
||||
}
|
||||
|
||||
func newDevices(idOrCommaSeparated ...string) devices {
|
||||
lookup := make(map[string]int)
|
||||
|
||||
i := 0
|
||||
for _, commaSeparated := range idOrCommaSeparated {
|
||||
for _, id := range strings.Split(commaSeparated, ",") {
|
||||
lookup[id] = i
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
d := devices{
|
||||
len: i,
|
||||
lookup: lookup,
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// List returns the list of requested devices
|
||||
func (d devices) List() []string {
|
||||
list := make([]string, d.len)
|
||||
|
||||
for id, i := range d.lookup {
|
||||
list[i] = id
|
||||
}
|
||||
|
||||
return list
|
||||
}
|
||||
|
||||
// Has checks whether the specified ID is in the set of requested devices
|
||||
func (d devices) Has(id string) bool {
|
||||
if id == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
_, exist := d.lookup[id]
|
||||
return exist
|
||||
}
|
||||
@@ -44,6 +44,12 @@ type RuntimeConfig struct {
|
||||
// modesConfig defines (optional) per-mode configs
|
||||
type modesConfig struct {
|
||||
CSV csvModeConfig `toml:"csv"`
|
||||
CDI cdiModeConfig `toml:"cdi"`
|
||||
}
|
||||
|
||||
type cdiModeConfig struct {
|
||||
// SpecDirs allows for the default spec dirs for CDI to be overridden
|
||||
SpecDirs []string `toml:"spec-dirs"`
|
||||
}
|
||||
|
||||
type csvModeConfig struct {
|
||||
|
||||
62
internal/discover/char_devices.go
Normal file
62
internal/discover/char_devices.go
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// charDevices is a discover for a list of character devices
|
||||
type charDevices mounts
|
||||
|
||||
var _ Discover = (*charDevices)(nil)
|
||||
|
||||
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
|
||||
func NewCharDeviceDiscoverer(logger *logrus.Logger, devices []string, root string) Discover {
|
||||
locator := lookup.NewCharDeviceLocator(logger, root)
|
||||
|
||||
return NewDeviceDiscoverer(logger, locator, root, devices)
|
||||
}
|
||||
|
||||
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
|
||||
func NewDeviceDiscoverer(logger *logrus.Logger, locator lookup.Locator, root string, devices []string) Discover {
|
||||
m := NewMounts(logger, locator, root, devices).(*mounts)
|
||||
|
||||
return (*charDevices)(m)
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts for the charDevices.
|
||||
// Since this explicitly specifies a device list, the mounts are nil.
|
||||
func (d *charDevices) Mounts() ([]Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Devices returns the discovered devices for the charDevices.
|
||||
// Here the device nodes are first discovered as mounts and these are converted to devices.
|
||||
func (d *charDevices) Devices() ([]Device, error) {
|
||||
devicesAsMounts, err := (*mounts)(d).Mounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
83
internal/discover/char_devices_test.go
Normal file
83
internal/discover/char_devices_test.go
Normal file
@@ -0,0 +1,83 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCharDevices(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
input *charDevices
|
||||
expectedMounts []Mount
|
||||
expectedMountsError error
|
||||
expectedDevicesError error
|
||||
expectedDevices []Device
|
||||
}{
|
||||
{
|
||||
description: "dev mounts are empty",
|
||||
input: (*charDevices)(
|
||||
&mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
),
|
||||
expectedDevices: []Device{{Path: "located", HostPath: "located"}},
|
||||
},
|
||||
{
|
||||
description: "dev devices returns error for nil lookup",
|
||||
input: &charDevices{},
|
||||
expectedDevicesError: fmt.Errorf("no lookup defined"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tc.input.logger = logger
|
||||
|
||||
mounts, err := tc.input.Mounts()
|
||||
if tc.expectedMountsError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedMounts, mounts)
|
||||
|
||||
devices, err := tc.input.Devices()
|
||||
if tc.expectedDevicesError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -24,11 +24,6 @@ import (
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// charDevices is a discover for a list of character devices
|
||||
type charDevices mounts
|
||||
|
||||
var _ Discover = (*charDevices)(nil)
|
||||
|
||||
// NewFromCSVFiles creates a discoverer for the specified CSV files. A logger is also supplied.
|
||||
// The constructed discoverer is comprised of a list, with each element in the list being associated with a
|
||||
// single CSV files.
|
||||
@@ -47,23 +42,21 @@ func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discov
|
||||
csv.MountSpecSym: symlinkLocator,
|
||||
}
|
||||
|
||||
var discoverers []Discover
|
||||
var mountSpecs []*csv.MountSpec
|
||||
for _, filename := range files {
|
||||
d, err := NewFromCSVFile(logger, locators, filename)
|
||||
targets, err := loadCSVFile(logger, filename)
|
||||
if err != nil {
|
||||
logger.Warnf("Skipping CSV file %v: %v", filename, err)
|
||||
continue
|
||||
}
|
||||
discoverers = append(discoverers, d)
|
||||
mountSpecs = append(mountSpecs, targets...)
|
||||
}
|
||||
|
||||
return &list{discoverers: discoverers}, nil
|
||||
return newFromMountSpecs(logger, locators, root, mountSpecs)
|
||||
}
|
||||
|
||||
// NewFromCSVFile creates a discoverer for the specified CSV file. A logger is also supplied.
|
||||
// The constructed discoverer is comprised of a list, with each element in the list being associated with a particular
|
||||
// MountSpecType.
|
||||
func NewFromCSVFile(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, filename string) (Discover, error) {
|
||||
// loadCSVFile loads the specified CSV file and returns the list of mount specs
|
||||
func loadCSVFile(logger *logrus.Logger, filename string) ([]*csv.MountSpec, error) {
|
||||
// Create a discoverer for each file-kind combination
|
||||
targets, err := csv.NewCSVFileParser(logger, filename).Parse()
|
||||
if err != nil {
|
||||
@@ -73,12 +66,12 @@ func NewFromCSVFile(logger *logrus.Logger, locators map[csv.MountSpecType]lookup
|
||||
return nil, fmt.Errorf("CSV file is empty")
|
||||
}
|
||||
|
||||
return newFromMountSpecs(logger, locators, targets)
|
||||
return targets, nil
|
||||
}
|
||||
|
||||
// newFromMountSpecs creates a discoverer for the CSV file. A logger is also supplied.
|
||||
// A list of csvDiscoverers is returned, with each being associated with a single MountSpecType.
|
||||
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, targets []*csv.MountSpec) (Discover, error) {
|
||||
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, root string, targets []*csv.MountSpec) (Discover, error) {
|
||||
if len(targets) == 0 {
|
||||
return &None{}, nil
|
||||
}
|
||||
@@ -99,41 +92,16 @@ func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]loo
|
||||
return nil, fmt.Errorf("no locator defined for '%v'", t)
|
||||
}
|
||||
|
||||
m := &mounts{
|
||||
logger: logger,
|
||||
lookup: locator,
|
||||
required: candidatesByType[t],
|
||||
}
|
||||
|
||||
var m Discover
|
||||
switch t {
|
||||
case csv.MountSpecDev:
|
||||
// For device mount specs, we insert a charDevices into the list of discoverers.
|
||||
discoverers = append(discoverers, (*charDevices)(m))
|
||||
m = NewDeviceDiscoverer(logger, locator, root, candidatesByType[t])
|
||||
default:
|
||||
discoverers = append(discoverers, m)
|
||||
m = NewMounts(logger, locator, root, candidatesByType[t])
|
||||
}
|
||||
discoverers = append(discoverers, m)
|
||||
|
||||
}
|
||||
|
||||
return &list{discoverers: discoverers}, nil
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts for the charDevices. Since this explicitly specifies a
|
||||
// device list, the mounts are nil.
|
||||
func (d *charDevices) Mounts() ([]Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Devices returns the discovered devices for the charDevices. Here the device nodes are first
|
||||
// discovered as mounts and these are converted to devices.
|
||||
func (d *charDevices) Devices() ([]Device, error) {
|
||||
devicesAsMounts, err := (*mounts)(d).Mounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
@@ -26,63 +26,6 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCharDevices(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
input *charDevices
|
||||
expectedMounts []Mount
|
||||
expectedMountsError error
|
||||
expectedDevicesError error
|
||||
expectedDevices []Device
|
||||
}{
|
||||
{
|
||||
description: "dev mounts are empty",
|
||||
input: (*charDevices)(
|
||||
&mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
),
|
||||
expectedDevices: []Device{{Path: "located"}},
|
||||
},
|
||||
{
|
||||
description: "dev devices returns error for nil lookup",
|
||||
input: &charDevices{},
|
||||
expectedDevicesError: fmt.Errorf("no lookup defined"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tc.input.logger = logger
|
||||
|
||||
mounts, err := tc.input.Mounts()
|
||||
if tc.expectedMountsError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedMounts, mounts)
|
||||
|
||||
devices, err := tc.input.Devices()
|
||||
if tc.expectedDevicesError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewFromMountSpec(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
@@ -93,6 +36,7 @@ func TestNewFromMountSpec(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
root string
|
||||
targets []*csv.MountSpec
|
||||
expectedError error
|
||||
expectedDiscoverer Discover
|
||||
@@ -133,12 +77,50 @@ func TestNewFromMountSpec(t *testing.T) {
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["dev"],
|
||||
root: "/",
|
||||
required: []string{"dev0", "dev1"},
|
||||
},
|
||||
),
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["lib"],
|
||||
root: "/",
|
||||
required: []string{"lib0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "sets root",
|
||||
targets: []*csv.MountSpec{
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev0",
|
||||
},
|
||||
{
|
||||
Type: "lib",
|
||||
Path: "lib0",
|
||||
},
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev1",
|
||||
},
|
||||
},
|
||||
root: "/some/root",
|
||||
expectedDiscoverer: &list{
|
||||
discoverers: []Discover{
|
||||
(*charDevices)(
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["dev"],
|
||||
root: "/some/root",
|
||||
required: []string{"dev0", "dev1"},
|
||||
},
|
||||
),
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["lib"],
|
||||
root: "/some/root",
|
||||
required: []string{"lib0"},
|
||||
},
|
||||
},
|
||||
@@ -148,7 +130,7 @@ func TestNewFromMountSpec(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
discoverer, err := newFromMountSpecs(logger, locators, tc.targets)
|
||||
discoverer, err := newFromMountSpecs(logger, locators, tc.root, tc.targets)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
|
||||
@@ -24,12 +24,14 @@ type Config struct {
|
||||
|
||||
// Device represents a discovered character device.
|
||||
type Device struct {
|
||||
Path string
|
||||
HostPath string
|
||||
Path string
|
||||
}
|
||||
|
||||
// Mount represents a discovered mount.
|
||||
type Mount struct {
|
||||
Path string
|
||||
HostPath string
|
||||
Path string
|
||||
}
|
||||
|
||||
// Hook represents a discovered hook.
|
||||
|
||||
62
internal/discover/filter.go
Normal file
62
internal/discover/filter.go
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import "github.com/sirupsen/logrus"
|
||||
|
||||
// Filter defines an interface for filtering discovered entities
|
||||
type Filter interface {
|
||||
DeviceIsSelected(device Device) bool
|
||||
}
|
||||
|
||||
// filtered represents a filtered discoverer
|
||||
type filtered struct {
|
||||
Discover
|
||||
logger *logrus.Logger
|
||||
filter Filter
|
||||
}
|
||||
|
||||
// newFilteredDisoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
|
||||
func newFilteredDisoverer(logger *logrus.Logger, applyTo Discover, filter Filter) Discover {
|
||||
return filtered{
|
||||
Discover: applyTo,
|
||||
logger: logger,
|
||||
filter: filter,
|
||||
}
|
||||
}
|
||||
|
||||
// Devices returns a filtered list of devices based on the specified filter.
|
||||
func (d filtered) Devices() ([]Device, error) {
|
||||
devices, err := d.Discover.Devices()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if d.filter == nil {
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
var selected []Device
|
||||
for _, device := range devices {
|
||||
if d.filter.DeviceIsSelected(device) {
|
||||
selected = append(selected, device)
|
||||
}
|
||||
d.logger.Debugf("skipping device %v", device)
|
||||
}
|
||||
|
||||
return selected, nil
|
||||
}
|
||||
80
internal/discover/gds.go
Normal file
80
internal/discover/gds.go
Normal file
@@ -0,0 +1,80 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type gdsDeviceDiscoverer struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
devices Discover
|
||||
mounts Discover
|
||||
}
|
||||
|
||||
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
|
||||
func NewGDSDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
[]string{"/dev/nvidia-fs*"},
|
||||
root,
|
||||
)
|
||||
|
||||
udev := NewMounts(
|
||||
logger,
|
||||
lookup.NewDirectoryLocator(logger, root),
|
||||
root,
|
||||
[]string{"/run/udev"},
|
||||
)
|
||||
|
||||
cufile := NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(root),
|
||||
),
|
||||
root,
|
||||
[]string{"/etc/cufile.json"},
|
||||
)
|
||||
|
||||
d := gdsDeviceDiscoverer{
|
||||
logger: logger,
|
||||
devices: devices,
|
||||
mounts: Merge(udev, cufile),
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
// Devices discovers the nvidia-fs device nodes for use with GPUDirect Storage
|
||||
func (d *gdsDeviceDiscoverer) Devices() ([]Device, error) {
|
||||
return d.devices.Devices()
|
||||
}
|
||||
|
||||
// Mounts discovers the required mounts for GPUDirect Storage.
|
||||
// If no devices are discovered the discovered mounts are empty
|
||||
func (d *gdsDeviceDiscoverer) Mounts() ([]Mount, error) {
|
||||
devices, err := d.Devices()
|
||||
if err != nil || len(devices) == 0 {
|
||||
d.logger.Debugf("No nvidia-fs devices detected; skipping detection of mounts")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return d.mounts.Mounts()
|
||||
}
|
||||
276
internal/discover/graphics.go
Normal file
276
internal/discover/graphics.go
Normal file
@@ -0,0 +1,276 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
|
||||
func NewGraphicsDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, cfg *Config) (Discover, error) {
|
||||
root := cfg.Root
|
||||
|
||||
mounts, err := NewGraphicsMountsDiscoverer(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
|
||||
}
|
||||
|
||||
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
|
||||
}
|
||||
|
||||
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, cfg)
|
||||
|
||||
discover := Merge(
|
||||
Merge(drmDeviceNodes, drmByPathSymlinks),
|
||||
mounts,
|
||||
)
|
||||
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
// NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan.
|
||||
func NewGraphicsMountsDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
|
||||
locator, err := lookup.NewLibraryLocator(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct library locator: %v", err)
|
||||
}
|
||||
libraries := NewMounts(
|
||||
logger,
|
||||
locator,
|
||||
root,
|
||||
[]string{
|
||||
"libnvidia-egl-gbm.so",
|
||||
},
|
||||
)
|
||||
|
||||
jsonMounts := NewMounts(
|
||||
logger,
|
||||
lookup.NewFileLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(root),
|
||||
lookup.WithSearchPaths("/etc", "/usr/share"),
|
||||
),
|
||||
root,
|
||||
[]string{
|
||||
"glvnd/egl_vendor.d/10_nvidia.json",
|
||||
"vulkan/icd.d/nvidia_icd.json",
|
||||
"vulkan/implicit_layer.d/nvidia_layers.json",
|
||||
"egl/egl_external_platform.d/15_nvidia_gbm.json",
|
||||
"egl/egl_external_platform.d/10_nvidia_wayland.json",
|
||||
},
|
||||
)
|
||||
|
||||
discover := Merge(
|
||||
libraries,
|
||||
jsonMounts,
|
||||
)
|
||||
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
type drmDevicesByPath struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
lookup lookup.Locator
|
||||
nvidiaCTKExecutablePath string
|
||||
root string
|
||||
devicesFrom Discover
|
||||
}
|
||||
|
||||
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
|
||||
func newCreateDRMByPathSymlinks(logger *logrus.Logger, devices Discover, cfg *Config) Discover {
|
||||
d := drmDevicesByPath{
|
||||
logger: logger,
|
||||
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
|
||||
nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath,
|
||||
root: cfg.Root,
|
||||
devicesFrom: devices,
|
||||
}
|
||||
|
||||
return &d
|
||||
}
|
||||
|
||||
// Hooks returns a hook to create the symlinks from the required CSV files
|
||||
func (d drmDevicesByPath) Hooks() ([]Hook, error) {
|
||||
devices, err := d.devicesFrom.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover devices for by-path symlinks: %v", err)
|
||||
}
|
||||
if len(devices) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
links, err := d.getSpecificLinkArgs(devices)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine specific links: %v", err)
|
||||
}
|
||||
if len(links) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
hookPath := nvidiaCTKDefaultFilePath
|
||||
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "hook", "create-symlinks"}
|
||||
for _, l := range links {
|
||||
args = append(args, "--link", l)
|
||||
}
|
||||
|
||||
h := Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// getSpecificLinkArgs returns the required specic links that need to be created
|
||||
func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error) {
|
||||
selectedDevices := make(map[string]bool)
|
||||
for _, d := range devices {
|
||||
selectedDevices[filepath.Base(d.HostPath)] = true
|
||||
}
|
||||
|
||||
linkLocator := lookup.NewFileLocator(
|
||||
lookup.WithLogger(d.logger),
|
||||
lookup.WithRoot(d.root),
|
||||
)
|
||||
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
|
||||
if err != nil {
|
||||
d.logger.Warningf("Failed to locate by-path links: %v; ignoring", err)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var links []string
|
||||
for _, c := range candidates {
|
||||
device, err := os.Readlink(c)
|
||||
if err != nil {
|
||||
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", c)
|
||||
continue
|
||||
}
|
||||
|
||||
if selectedDevices[filepath.Base(device)] {
|
||||
d.logger.Debugf("adding device symlink %v -> %v", c, device)
|
||||
links = append(links, fmt.Sprintf("%v::%v", device, c))
|
||||
}
|
||||
}
|
||||
|
||||
return links, nil
|
||||
}
|
||||
|
||||
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
|
||||
func newDRMDeviceDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, root string) (Discover, error) {
|
||||
allDevices := NewDeviceDiscoverer(
|
||||
logger,
|
||||
lookup.NewCharDeviceLocator(logger, root),
|
||||
root,
|
||||
[]string{
|
||||
"/dev/dri/card*",
|
||||
"/dev/dri/renderD*",
|
||||
},
|
||||
)
|
||||
|
||||
filter, err := newDRMDeviceFilter(logger, devices, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
|
||||
}
|
||||
|
||||
// We return a discoverer that applies the DRM device filter created above to all discovered DRM device nodes.
|
||||
d := newFilteredDisoverer(
|
||||
logger,
|
||||
allDevices,
|
||||
filter,
|
||||
)
|
||||
|
||||
return d, err
|
||||
}
|
||||
|
||||
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
|
||||
func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, root string) (Filter, error) {
|
||||
gpuInformationPaths, err := proc.GetInformationFilePaths(root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read GPU information: %v", err)
|
||||
}
|
||||
|
||||
var selectedBusIds []string
|
||||
for _, f := range gpuInformationPaths {
|
||||
info, err := proc.ParseGPUInformationFile(f)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %v: %v", f, err)
|
||||
}
|
||||
uuid := info[proc.GPUInfoGPUUUID]
|
||||
busID := info[proc.GPUInfoBusLocation]
|
||||
minor := info[proc.GPUInfoDeviceMinor]
|
||||
|
||||
if devices.Has(minor) || devices.Has(uuid) || devices.Has(busID) {
|
||||
selectedBusIds = append(selectedBusIds, busID)
|
||||
}
|
||||
}
|
||||
|
||||
filter := make(selectDeviceByPath)
|
||||
for _, busID := range selectedBusIds {
|
||||
drmDeviceNodes, err := drm.GetDeviceNodesByBusID(busID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine DRM devices for %v: %v", busID, err)
|
||||
}
|
||||
for _, drmDeviceNode := range drmDeviceNodes {
|
||||
filter[filepath.Join(drmDeviceNode)] = true
|
||||
}
|
||||
}
|
||||
|
||||
return filter, nil
|
||||
}
|
||||
|
||||
// selectDeviceByPath is a filter that allows devices to be selected by the path
|
||||
type selectDeviceByPath map[string]bool
|
||||
|
||||
var _ Filter = (*selectDeviceByPath)(nil)
|
||||
|
||||
// DeviceIsSelected determines whether the device's path has been selected
|
||||
func (s selectDeviceByPath) DeviceIsSelected(device Device) bool {
|
||||
return s[device.Path]
|
||||
}
|
||||
|
||||
// MountIsSelected is always true
|
||||
func (s selectDeviceByPath) MountIsSelected(Mount) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// HookIsSelected is always true
|
||||
func (s selectDeviceByPath) HookIsSelected(Hook) bool {
|
||||
return true
|
||||
}
|
||||
@@ -19,7 +19,6 @@ package discover
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
@@ -57,70 +56,102 @@ func (d ldconfig) Hooks() ([]Hook, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
|
||||
}
|
||||
|
||||
libDirs := getLibDirs(mounts)
|
||||
|
||||
hookPath := nvidiaCTKDefaultFilePath
|
||||
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "hook", "update-ldcache"}
|
||||
for _, f := range libDirs {
|
||||
args = append(args, "--folder", f)
|
||||
}
|
||||
h := Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
h := CreateLDCacheUpdateHook(
|
||||
d.logger,
|
||||
d.lookup,
|
||||
d.nvidiaCTKExecutablePath,
|
||||
nvidiaCTKDefaultFilePath,
|
||||
getLibraryPaths(mounts),
|
||||
)
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// getLibDirs extracts the library dirs from the specified mounts
|
||||
func getLibDirs(mounts []Mount) []string {
|
||||
var paths []string
|
||||
checked := make(map[string]bool)
|
||||
|
||||
for _, m := range mounts {
|
||||
dir := filepath.Dir(m.Path)
|
||||
if dir == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
_, exists := checked[dir]
|
||||
if exists {
|
||||
continue
|
||||
}
|
||||
checked[dir] = isLibName(filepath.Base(m.Path))
|
||||
|
||||
if checked[dir] {
|
||||
paths = append(paths, dir)
|
||||
}
|
||||
// CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache
|
||||
func CreateLDCacheUpdateHook(logger *logrus.Logger, lookup lookup.Locator, executable string, defaultPath string, libraries []string) Hook {
|
||||
var args []string
|
||||
for _, f := range uniqueFolders(libraries) {
|
||||
args = append(args, "--folder", f)
|
||||
}
|
||||
|
||||
sort.Strings(paths)
|
||||
hook := CreateNvidiaCTKHook(
|
||||
logger,
|
||||
lookup,
|
||||
executable,
|
||||
defaultPath,
|
||||
"update-ldcache",
|
||||
args...,
|
||||
)
|
||||
|
||||
return hook
|
||||
|
||||
}
|
||||
|
||||
// CreateNvidiaCTKHook creates a hook which invokes the NVIDIA Container CLI hook subcommand.
|
||||
func CreateNvidiaCTKHook(logger *logrus.Logger, lookup lookup.Locator, executable string, defaultPath string, hookName string, additionalArgs ...string) Hook {
|
||||
hookPath := defaultPath
|
||||
targets, err := lookup.Locate(executable)
|
||||
if err != nil {
|
||||
logger.Warnf("Failed to locate %v: %v", executable, err)
|
||||
} else if len(targets) == 0 {
|
||||
logger.Warnf("%v not found", executable)
|
||||
} else {
|
||||
logger.Debugf("Found %v candidates: %v", executable, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
|
||||
return Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
Args: append([]string{filepath.Base(hookPath), "hook", hookName}, additionalArgs...),
|
||||
}
|
||||
}
|
||||
|
||||
// getLibraryPaths extracts the library dirs from the specified mounts
|
||||
func getLibraryPaths(mounts []Mount) []string {
|
||||
var paths []string
|
||||
for _, m := range mounts {
|
||||
if !isLibName(m.Path) {
|
||||
continue
|
||||
}
|
||||
paths = append(paths, m.Path)
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
// isLibName checks if the specified filename is a library (i.e. ends in `.so*`)
|
||||
func isLibName(filename string) bool {
|
||||
parts := strings.Split(filename, ".")
|
||||
|
||||
for _, p := range parts {
|
||||
if p == "so" {
|
||||
return true
|
||||
}
|
||||
base := filepath.Base(filename)
|
||||
|
||||
isLib, err := filepath.Match("lib?*.so*", base)
|
||||
if !isLib || err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return false
|
||||
parts := strings.Split(base, ".so")
|
||||
if len(parts) == 1 {
|
||||
return true
|
||||
}
|
||||
|
||||
return parts[len(parts)-1] == "" || strings.HasPrefix(parts[len(parts)-1], ".")
|
||||
}
|
||||
|
||||
// uniqueFolders returns the unique set of folders for the specified files
|
||||
func uniqueFolders(libraries []string) []string {
|
||||
var paths []string
|
||||
checked := make(map[string]bool)
|
||||
|
||||
for _, l := range libraries {
|
||||
dir := filepath.Dir(l)
|
||||
if dir == "" {
|
||||
continue
|
||||
}
|
||||
if checked[dir] {
|
||||
continue
|
||||
}
|
||||
checked[dir] = true
|
||||
paths = append(paths, dir)
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
164
internal/discover/ldconfig_test.go
Normal file
164
internal/discover/ldconfig_test.go
Normal file
@@ -0,0 +1,164 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestLDCacheUpdateHook(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
cfg := Config{
|
||||
Root: "/",
|
||||
NVIDIAContainerToolkitCLIExecutablePath: "/foo/bar/nvidia-ctk",
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
mounts []Mount
|
||||
mountError error
|
||||
expectedError error
|
||||
expectedArgs []string
|
||||
}{
|
||||
{
|
||||
description: "empty mounts",
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache"},
|
||||
},
|
||||
{
|
||||
description: "mount error",
|
||||
mountError: fmt.Errorf("mountError"),
|
||||
expectedError: fmt.Errorf("mountError"),
|
||||
},
|
||||
{
|
||||
description: "library folders are added to args",
|
||||
mounts: []Mount{
|
||||
{
|
||||
Path: "/usr/local/lib/libfoo.so",
|
||||
},
|
||||
{
|
||||
Path: "/usr/bin/notlib",
|
||||
},
|
||||
{
|
||||
Path: "/usr/local/libother/libfoo.so",
|
||||
},
|
||||
{
|
||||
Path: "/usr/local/lib/libbar.so",
|
||||
},
|
||||
},
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
|
||||
},
|
||||
{
|
||||
description: "host paths are ignored",
|
||||
mounts: []Mount{
|
||||
{
|
||||
HostPath: "/usr/local/other/libfoo.so",
|
||||
Path: "/usr/local/lib/libfoo.so",
|
||||
},
|
||||
},
|
||||
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
mountMock := &DiscoverMock{
|
||||
MountsFunc: func() ([]Mount, error) {
|
||||
return tc.mounts, tc.mountError
|
||||
},
|
||||
}
|
||||
expectedHook := Hook{
|
||||
Path: "/usr/bin/nvidia-ctk",
|
||||
Args: tc.expectedArgs,
|
||||
Lifecycle: "createContainer",
|
||||
}
|
||||
|
||||
d, err := NewLDCacheUpdateHook(logger, mountMock, &cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.Len(t, mountMock.MountsCalls(), 1)
|
||||
require.Len(t, mountMock.DevicesCalls(), 0)
|
||||
require.Len(t, mountMock.HooksCalls(), 0)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, hooks, 1)
|
||||
|
||||
require.EqualValues(t, hooks[0], expectedHook)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, mounts)
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIsLibName(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
isLib bool
|
||||
}{
|
||||
{
|
||||
name: "",
|
||||
isLib: false,
|
||||
},
|
||||
{
|
||||
name: "lib/not/.so",
|
||||
isLib: false,
|
||||
},
|
||||
{
|
||||
name: "lib.so",
|
||||
isLib: false,
|
||||
},
|
||||
{
|
||||
name: "notlibcuda.so",
|
||||
isLib: false,
|
||||
},
|
||||
{
|
||||
name: "libcuda.so",
|
||||
isLib: true,
|
||||
},
|
||||
{
|
||||
name: "libcuda.so.1",
|
||||
isLib: true,
|
||||
},
|
||||
{
|
||||
name: "libcuda.soNOT",
|
||||
isLib: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
require.Equal(t, tc.isLib, isLibName(tc.name))
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -27,8 +27,8 @@ type list struct {
|
||||
|
||||
var _ Discover = (*list)(nil)
|
||||
|
||||
// NewList creates a discoverer that is the composite of a list of discoveres.
|
||||
func NewList(d ...Discover) Discover {
|
||||
// Merge creates a discoverer that is the composite of a list of discoveres.
|
||||
func Merge(d ...Discover) Discover {
|
||||
l := list{
|
||||
discoverers: d,
|
||||
}
|
||||
|
||||
35
internal/discover/mofed.go
Normal file
35
internal/discover/mofed.go
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
|
||||
func NewMOFEDDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
|
||||
devices := NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
[]string{
|
||||
"/dev/infiniband/uverbs*",
|
||||
"/dev/infiniband/rdma_cm",
|
||||
},
|
||||
root,
|
||||
)
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
@@ -18,6 +18,8 @@ package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
@@ -31,6 +33,7 @@ type mounts struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
lookup lookup.Locator
|
||||
root string
|
||||
required []string
|
||||
sync.Mutex
|
||||
cache []Mount
|
||||
@@ -38,6 +41,16 @@ type mounts struct {
|
||||
|
||||
var _ Discover = (*mounts)(nil)
|
||||
|
||||
// NewMounts creates a discoverer for the required mounts using the specified locator.
|
||||
func NewMounts(logger *logrus.Logger, lookup lookup.Locator, root string, required []string) Discover {
|
||||
return &mounts{
|
||||
logger: logger,
|
||||
lookup: lookup,
|
||||
root: filepath.Join("/", root),
|
||||
required: required,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *mounts) Mounts() ([]Mount, error) {
|
||||
if d.lookup == nil {
|
||||
return nil, fmt.Errorf("no lookup defined")
|
||||
@@ -51,7 +64,7 @@ func (d *mounts) Mounts() ([]Mount, error) {
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
|
||||
paths := make(map[string]bool)
|
||||
uniqueMounts := make(map[string]Mount)
|
||||
|
||||
for _, candidate := range d.required {
|
||||
d.logger.Debugf("Locating %v", candidate)
|
||||
@@ -66,20 +79,39 @@ func (d *mounts) Mounts() ([]Mount, error) {
|
||||
}
|
||||
d.logger.Debugf("Located %v as %v", candidate, located)
|
||||
for _, p := range located {
|
||||
paths[p] = true
|
||||
if _, ok := uniqueMounts[p]; ok {
|
||||
d.logger.Debugf("Skipping duplicate mount %v", p)
|
||||
continue
|
||||
}
|
||||
|
||||
r := d.relativeTo(p)
|
||||
if r == "" {
|
||||
r = p
|
||||
}
|
||||
|
||||
d.logger.Infof("Selecting %v as %v", p, r)
|
||||
uniqueMounts[p] = Mount{
|
||||
HostPath: p,
|
||||
Path: r,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var mounts []Mount
|
||||
for path := range paths {
|
||||
d.logger.Infof("Selecting %v", path)
|
||||
mount := Mount{
|
||||
Path: path,
|
||||
}
|
||||
mounts = append(mounts, mount)
|
||||
for _, m := range uniqueMounts {
|
||||
mounts = append(mounts, m)
|
||||
}
|
||||
|
||||
d.cache = mounts
|
||||
|
||||
return mounts, nil
|
||||
return d.cache, nil
|
||||
}
|
||||
|
||||
// relativeTo returns the path relative to the root for the file locator
|
||||
func (d *mounts) relativeTo(path string) string {
|
||||
if d.root == "/" {
|
||||
return path
|
||||
}
|
||||
|
||||
return strings.TrimPrefix(path, d.root)
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "located"}},
|
||||
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
|
||||
},
|
||||
{
|
||||
description: "mounts removes located duplicates",
|
||||
@@ -83,7 +83,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "located"}},
|
||||
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips located errors",
|
||||
@@ -98,7 +98,7 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "error", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "required0"}, {Path: "required1"}},
|
||||
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips unlocated",
|
||||
@@ -113,10 +113,10 @@ func TestMounts(t *testing.T) {
|
||||
},
|
||||
required: []string{"required0", "empty", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "required0"}, {Path: "required1"}},
|
||||
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips unlocated",
|
||||
description: "mounts adds multiple",
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
@@ -129,10 +129,25 @@ func TestMounts(t *testing.T) {
|
||||
required: []string{"required0", "multiple", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{
|
||||
{Path: "required0"},
|
||||
{Path: "multiple0"},
|
||||
{Path: "multiple1"},
|
||||
{Path: "required1"},
|
||||
{Path: "required0", HostPath: "required0"},
|
||||
{Path: "multiple0", HostPath: "multiple0"},
|
||||
{Path: "multiple1", HostPath: "multiple1"},
|
||||
{Path: "required1", HostPath: "required1"},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "mounts uses relative path",
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
return []string{"/some/root/located"}, nil
|
||||
},
|
||||
},
|
||||
root: "/some/root",
|
||||
required: []string{"required0", "multiple", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{
|
||||
{Path: "/located", HostPath: "/some/root/located"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -117,7 +117,7 @@ func (d symlinks) getSpecificLinkArgs() ([]string, error) {
|
||||
}
|
||||
|
||||
linkPath := filepath.Join(filepath.Dir(m.Path), link)
|
||||
links = append(links, "--link", fmt.Sprintf("%v:%v", target, linkPath))
|
||||
links = append(links, "--link", fmt.Sprintf("%v::%v", target, linkPath))
|
||||
linkProcessed[link] = true
|
||||
}
|
||||
|
||||
|
||||
@@ -25,21 +25,36 @@ import (
|
||||
type device discover.Device
|
||||
|
||||
// toEdits converts a discovered device to CDI Container Edits.
|
||||
func (d device) toEdits() *cdi.ContainerEdits {
|
||||
func (d device) toEdits() (*cdi.ContainerEdits, error) {
|
||||
deviceNode, err := d.toSpec()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
e := cdi.ContainerEdits{
|
||||
ContainerEdits: &specs.ContainerEdits{
|
||||
DeviceNodes: []*specs.DeviceNode{d.toSpec()},
|
||||
DeviceNodes: []*specs.DeviceNode{deviceNode},
|
||||
},
|
||||
}
|
||||
return &e
|
||||
return &e, nil
|
||||
}
|
||||
|
||||
// toSpec converts a discovered Device to a CDI Spec Device. Note
|
||||
// that missing info is filled in when edits are applied by querying the Device node.
|
||||
func (d device) toSpec() *specs.DeviceNode {
|
||||
func (d device) toSpec() (*specs.DeviceNode, error) {
|
||||
// The HostPath field was added in the v0.5.0 CDI specification.
|
||||
// The cdi package uses strict unmarshalling when loading specs from file causing failures for
|
||||
// unexpected fields.
|
||||
// Since the behaviour for HostPath == "" and HostPath == Path are equivalent, we clear HostPath
|
||||
// if it is equal to Path to ensure compatibility with the widest range of specs.
|
||||
hostPath := d.HostPath
|
||||
if hostPath == d.Path {
|
||||
hostPath = ""
|
||||
}
|
||||
s := specs.DeviceNode{
|
||||
Path: d.Path,
|
||||
HostPath: hostPath,
|
||||
Path: d.Path,
|
||||
}
|
||||
|
||||
return &s
|
||||
return &s, nil
|
||||
}
|
||||
|
||||
69
internal/edits/device_test.go
Normal file
69
internal/edits/device_test.go
Normal file
@@ -0,0 +1,69 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package edits
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDeviceToSpec(t *testing.T) {
|
||||
testCases := []struct {
|
||||
device discover.Device
|
||||
expected *specs.DeviceNode
|
||||
}{
|
||||
{
|
||||
device: discover.Device{
|
||||
Path: "/foo",
|
||||
},
|
||||
expected: &specs.DeviceNode{
|
||||
Path: "/foo",
|
||||
},
|
||||
},
|
||||
{
|
||||
device: discover.Device{
|
||||
Path: "/foo",
|
||||
HostPath: "/foo",
|
||||
},
|
||||
expected: &specs.DeviceNode{
|
||||
Path: "/foo",
|
||||
},
|
||||
},
|
||||
{
|
||||
device: discover.Device{
|
||||
Path: "/foo",
|
||||
HostPath: "/not/foo",
|
||||
},
|
||||
expected: &specs.DeviceNode{
|
||||
Path: "/foo",
|
||||
HostPath: "/not/foo",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
spec, err := device(tc.device).toSpec()
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expected, spec)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -34,6 +35,20 @@ type edits struct {
|
||||
// NewSpecEdits creates a SpecModifier that defines the required OCI spec edits (as CDI ContainerEdits) from the specified
|
||||
// discoverer.
|
||||
func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
|
||||
c, err := FromDiscoverer(d)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing container edits: %v", err)
|
||||
}
|
||||
e := edits{
|
||||
ContainerEdits: *c,
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
return &e, nil
|
||||
}
|
||||
|
||||
// FromDiscoverer creates CDI container edits for the specified discoverer.
|
||||
func FromDiscoverer(d discover.Discover) (*cdi.ContainerEdits, error) {
|
||||
devices, err := d.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover devices: %v", err)
|
||||
@@ -49,9 +64,13 @@ func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier,
|
||||
return nil, fmt.Errorf("failed to discover hooks: %v", err)
|
||||
}
|
||||
|
||||
c := cdi.ContainerEdits{}
|
||||
c := NewContainerEdits()
|
||||
for _, d := range devices {
|
||||
c.Append(device(d).toEdits())
|
||||
edits, err := device(d).toEdits()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to created container edits for device: %v", err)
|
||||
}
|
||||
c.Append(edits)
|
||||
}
|
||||
|
||||
for _, m := range mounts {
|
||||
@@ -62,12 +81,15 @@ func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier,
|
||||
c.Append(hook(h).toEdits())
|
||||
}
|
||||
|
||||
e := edits{
|
||||
ContainerEdits: c,
|
||||
logger: logger,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
return &e, nil
|
||||
// NewContainerEdits is a utility function to create a CDI ContainerEdits struct.
|
||||
func NewContainerEdits() *cdi.ContainerEdits {
|
||||
c := cdi.ContainerEdits{
|
||||
ContainerEdits: &specs.ContainerEdits{},
|
||||
}
|
||||
return &c
|
||||
}
|
||||
|
||||
// Modify applies the defined edits to the incoming OCI spec
|
||||
|
||||
31
internal/edits/edits_test.go
Normal file
31
internal/edits/edits_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package edits
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFromDiscovererAllowsMountsToIterate(t *testing.T) {
|
||||
edits, err := FromDiscoverer(discover.None{})
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Empty(t, edits.Mounts)
|
||||
}
|
||||
@@ -38,8 +38,7 @@ func (d mount) toEdits() *cdi.ContainerEdits {
|
||||
// that missing info is filled in when edits are applied by querying the Mount node.
|
||||
func (d mount) toSpec() *specs.Mount {
|
||||
s := specs.Mount{
|
||||
HostPath: d.Path,
|
||||
// TODO: We need to allow the container path to be customised
|
||||
HostPath: d.HostPath,
|
||||
ContainerPath: d.Path,
|
||||
Options: []string{
|
||||
"ro",
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
|
||||
package info
|
||||
|
||||
import "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
|
||||
// Logger is a basic interface for logging to allow these functions to be called
|
||||
// from code where logrus is not used.
|
||||
type Logger interface {
|
||||
@@ -32,10 +34,12 @@ func ResolveAutoMode(logger Logger, mode string) (rmode string) {
|
||||
logger.Infof("Auto-detected mode as '%v'", rmode)
|
||||
}()
|
||||
|
||||
isTegra, reason := IsTegraSystem()
|
||||
nvinfo := info.New()
|
||||
|
||||
isTegra, reason := nvinfo.IsTegraSystem()
|
||||
logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
|
||||
|
||||
hasNVML, reason := HasNVML()
|
||||
hasNVML, reason := nvinfo.HasNvml()
|
||||
logger.Debugf("Has NVML? %v: %v", hasNVML, reason)
|
||||
|
||||
if isTegra && !hasNVML {
|
||||
|
||||
39
internal/info/drm/drm_devices.go
Normal file
39
internal/info/drm/drm_devices.go
Normal file
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package drm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GetDeviceNodesByBusID returns the DRM devices associated with the specified PCI bus ID
|
||||
func GetDeviceNodesByBusID(busID string) ([]string, error) {
|
||||
drmRoot := filepath.Join("/sys/bus/pci/devices", busID, "drm")
|
||||
matches, err := filepath.Glob(fmt.Sprintf("%s/*", drmRoot))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var drmDeviceNodes []string
|
||||
for _, m := range matches {
|
||||
drmDeviceNode := filepath.Join("/dev/dri", filepath.Base(m))
|
||||
drmDeviceNodes = append(drmDeviceNodes, drmDeviceNode)
|
||||
}
|
||||
|
||||
return drmDeviceNodes, nil
|
||||
}
|
||||
89
internal/info/proc/information_files.go
Normal file
89
internal/info/proc/information_files.go
Normal file
@@ -0,0 +1,89 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package proc
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GPUInfoField represents the field name for information specified in a GPU's information file
|
||||
type GPUInfoField string
|
||||
|
||||
// The following constants define the fields of interest from the GPU information file
|
||||
const (
|
||||
GPUInfoModel = GPUInfoField("Model")
|
||||
GPUInfoGPUUUID = GPUInfoField("GPU UUID")
|
||||
GPUInfoBusLocation = GPUInfoField("Bus Location")
|
||||
GPUInfoDeviceMinor = GPUInfoField("Device Minor")
|
||||
)
|
||||
|
||||
// GPUInfo stores the information for a GPU as determined from its associated information file
|
||||
type GPUInfo map[GPUInfoField]string
|
||||
|
||||
// GetInformationFilePaths returns the list of information files associated with NVIDIA GPUs.
|
||||
func GetInformationFilePaths(root string) ([]string, error) {
|
||||
return filepath.Glob(filepath.Join(root, "/proc/driver/nvidia/gpus/*/information"))
|
||||
}
|
||||
|
||||
// ParseGPUInformationFile parses the specified GPU information file and constructs a GPUInfo structure
|
||||
func ParseGPUInformationFile(path string) (GPUInfo, error) {
|
||||
infoFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open %v: %v", path, err)
|
||||
}
|
||||
defer infoFile.Close()
|
||||
|
||||
return gpuInfoFrom(infoFile), nil
|
||||
}
|
||||
|
||||
// gpuInfoFrom parses a GPUInfo struct from the specified reader
|
||||
// An information file has the following strucutre:
|
||||
// $ cat /proc/driver/nvidia/gpus/0000\:06\:00.0/information
|
||||
// Model: Tesla V100-SXM2-16GB
|
||||
// IRQ: 408
|
||||
// GPU UUID: GPU-edfee158-11c1-52b8-0517-92f30e7fac88
|
||||
// Video BIOS: 88.00.41.00.01
|
||||
// Bus Type: PCIe
|
||||
// DMA Size: 47 bits
|
||||
// DMA Mask: 0x7fffffffffff
|
||||
// Bus Location: 0000:06:00.0
|
||||
// Device Minor: 0
|
||||
// GPU Excluded: No
|
||||
func gpuInfoFrom(reader io.Reader) GPUInfo {
|
||||
info := make(GPUInfo)
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
field := GPUInfoField(parts[0])
|
||||
value := strings.TrimSpace(parts[1])
|
||||
|
||||
info[field] = value
|
||||
}
|
||||
|
||||
return info
|
||||
}
|
||||
292
internal/ldcache/ldcache.go
Normal file
292
internal/ldcache/ldcache.go
Normal file
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
// Adapted from https://github.com/rai-project/ldcache
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const ldcachePath = "/etc/ld.so.cache"
|
||||
|
||||
const (
|
||||
magicString1 = "ld.so-1.7.0"
|
||||
magicString2 = "glibc-ld.so.cache"
|
||||
magicVersion = "1.1"
|
||||
)
|
||||
|
||||
const (
|
||||
flagTypeMask = 0x00ff
|
||||
flagTypeELF = 0x0001
|
||||
|
||||
flagArchMask = 0xff00
|
||||
flagArchI386 = 0x0000
|
||||
flagArchX8664 = 0x0300
|
||||
flagArchX32 = 0x0800
|
||||
flagArchPpc64le = 0x0500
|
||||
)
|
||||
|
||||
var errInvalidCache = errors.New("invalid ld.so.cache file")
|
||||
|
||||
type header1 struct {
|
||||
Magic [len(magicString1) + 1]byte // include null delimiter
|
||||
NLibs uint32
|
||||
}
|
||||
|
||||
type entry1 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
}
|
||||
|
||||
type header2 struct {
|
||||
Magic [len(magicString2)]byte
|
||||
Version [len(magicVersion)]byte
|
||||
NLibs uint32
|
||||
TableSize uint32
|
||||
_ [3]uint32 // unused
|
||||
_ uint64 // force 8 byte alignment
|
||||
}
|
||||
|
||||
type entry2 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
OSVersion uint32
|
||||
HWCap uint64
|
||||
}
|
||||
|
||||
// LDCache represents the interface for performing lookups into the LDCache
|
||||
type LDCache interface {
|
||||
List() ([]string, []string)
|
||||
Lookup(...string) ([]string, []string)
|
||||
}
|
||||
|
||||
type ldcache struct {
|
||||
*bytes.Reader
|
||||
|
||||
data, libs []byte
|
||||
header header2
|
||||
entries []entry2
|
||||
|
||||
root string
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
// New creates a new LDCache with the specified logger and root.
|
||||
func New(logger *log.Logger, root string) (LDCache, error) {
|
||||
path := filepath.Join(root, ldcachePath)
|
||||
|
||||
logger.Debugf("Opening ld.conf at %v", path)
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d, err := syscall.Mmap(int(f.Fd()), 0, int(fi.Size()),
|
||||
syscall.PROT_READ, syscall.MAP_PRIVATE)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache := &ldcache{
|
||||
data: d,
|
||||
Reader: bytes.NewReader(d),
|
||||
root: root,
|
||||
logger: logger,
|
||||
}
|
||||
return cache, cache.parse()
|
||||
}
|
||||
|
||||
func (c *ldcache) Close() error {
|
||||
return syscall.Munmap(c.data)
|
||||
}
|
||||
|
||||
func (c *ldcache) Magic() string {
|
||||
return string(c.header.Magic[:])
|
||||
}
|
||||
|
||||
func (c *ldcache) Version() string {
|
||||
return string(c.header.Version[:])
|
||||
}
|
||||
|
||||
func strn(b []byte, n int) string {
|
||||
return string(b[:n])
|
||||
}
|
||||
|
||||
func (c *ldcache) parse() error {
|
||||
var header header1
|
||||
|
||||
// Check for the old format (< glibc-2.2)
|
||||
if c.Len() <= int(unsafe.Sizeof(header)) {
|
||||
return errInvalidCache
|
||||
}
|
||||
if strn(c.data, len(magicString1)) == magicString1 {
|
||||
if err := binary.Read(c, binary.LittleEndian, &header); err != nil {
|
||||
return err
|
||||
}
|
||||
n := int64(header.NLibs) * int64(unsafe.Sizeof(entry1{}))
|
||||
offset, err := c.Seek(n, 1) // skip old entries
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n = (-offset) & int64(unsafe.Alignof(c.header)-1)
|
||||
_, err = c.Seek(n, 1) // skip padding
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c.libs = c.data[c.Size()-int64(c.Len()):] // kv offsets start here
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.header); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Magic() != magicString2 || c.Version() != magicVersion {
|
||||
return errInvalidCache
|
||||
}
|
||||
c.entries = make([]entry2, c.header.NLibs)
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.entries); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// List creates a list of libraires in the ldcache.
|
||||
// The 32-bit and 64-bit libraries are returned separately.
|
||||
func (c *ldcache) List() ([]string, []string) {
|
||||
paths := make(map[int][]string)
|
||||
|
||||
processed := make(map[string]bool)
|
||||
|
||||
for _, e := range c.entries {
|
||||
bits := 0
|
||||
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
|
||||
continue
|
||||
}
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
bits = 64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
bits = 32
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
|
||||
continue
|
||||
}
|
||||
value := c.libs[e.Value:]
|
||||
|
||||
n := bytes.IndexByte(value, 0)
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
name := filepath.Join(c.root, strn(value, n))
|
||||
c.logger.Debugf("checking %v", string(name))
|
||||
|
||||
path, err := filepath.EvalSymlinks(name)
|
||||
if err != nil {
|
||||
c.logger.Debugf("could not resolve symlink for %v", name)
|
||||
break
|
||||
}
|
||||
if processed[path] {
|
||||
continue
|
||||
}
|
||||
paths[bits] = append(paths[bits], path)
|
||||
processed[path] = true
|
||||
}
|
||||
|
||||
return paths[32], paths[64]
|
||||
}
|
||||
|
||||
// Lookup searches the ldcache for the specified prefixes.
|
||||
// The 32-bit and 64-bit libraries matching the prefixes are returned.
|
||||
func (c *ldcache) Lookup(libs ...string) (paths32, paths64 []string) {
|
||||
c.logger.Debugf("Looking up %v in cache", libs)
|
||||
type void struct{}
|
||||
var paths *[]string
|
||||
|
||||
set := make(map[string]void)
|
||||
prefix := make([][]byte, len(libs))
|
||||
|
||||
for i := range libs {
|
||||
prefix[i] = []byte(libs[i])
|
||||
}
|
||||
for _, e := range c.entries {
|
||||
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
|
||||
continue
|
||||
}
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
paths = &paths64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
paths = &paths32
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
|
||||
continue
|
||||
}
|
||||
lib := c.libs[e.Key:]
|
||||
value := c.libs[e.Value:]
|
||||
|
||||
for _, p := range prefix {
|
||||
if bytes.HasPrefix(lib, p) {
|
||||
n := bytes.IndexByte(value, 0)
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
name := filepath.Join(c.root, strn(value, n))
|
||||
c.logger.Debugf("checking %v", string(name))
|
||||
|
||||
path, err := filepath.EvalSymlinks(name)
|
||||
if err != nil {
|
||||
c.logger.Debugf("could not resolve symlink for %v", name)
|
||||
break
|
||||
}
|
||||
if _, ok := set[path]; ok {
|
||||
break
|
||||
}
|
||||
set[path] = void{}
|
||||
*paths = append(*paths, path)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -19,7 +19,6 @@ package lookup
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -31,22 +30,21 @@ const (
|
||||
// NewCharDeviceLocator creates a Locator that can be used to find char devices at the specified root. A logger is
|
||||
// also specified.
|
||||
func NewCharDeviceLocator(logger *logrus.Logger, root string) Locator {
|
||||
l := file{
|
||||
logger: logger,
|
||||
prefixes: []string{root, filepath.Join(root, devRoot)},
|
||||
filter: assertCharDevice,
|
||||
}
|
||||
|
||||
return &l
|
||||
return NewFileLocator(
|
||||
WithLogger(logger),
|
||||
WithRoot(root),
|
||||
WithSearchPaths("", devRoot),
|
||||
WithFilter(assertCharDevice),
|
||||
)
|
||||
}
|
||||
|
||||
// assertCharDevice checks whether the specified path is a char device and returns an error if this is not the case.
|
||||
func assertCharDevice(filename string) error {
|
||||
info, err := os.Stat(filename)
|
||||
info, err := os.Lstat(filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting info: %v", err)
|
||||
}
|
||||
if info.Mode()|os.ModeCharDevice == 0 {
|
||||
if info.Mode()&os.ModeCharDevice == 0 {
|
||||
return fmt.Errorf("%v is not a char device", filename)
|
||||
}
|
||||
return nil
|
||||
|
||||
55
internal/lookup/device_test.go
Normal file
55
internal/lookup/device_test.go
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCharDeviceLocator(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
root string
|
||||
expectedPrefixes []string
|
||||
}{
|
||||
{
|
||||
root: "",
|
||||
expectedPrefixes: []string{"", "/dev"},
|
||||
},
|
||||
{
|
||||
root: "/",
|
||||
expectedPrefixes: []string{"/", "/dev"},
|
||||
},
|
||||
{
|
||||
root: "/some/root",
|
||||
expectedPrefixes: []string{"/some/root", "/some/root/dev"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
f := NewCharDeviceLocator(logger, tc.root).(*file)
|
||||
|
||||
require.EqualValues(t, tc.expectedPrefixes, f.prefixes)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -26,13 +26,11 @@ import (
|
||||
// NewDirectoryLocator creates a Locator that can be used to find directories at the specified root. A logger
|
||||
// is also specified.
|
||||
func NewDirectoryLocator(logger *log.Logger, root string) Locator {
|
||||
l := file{
|
||||
logger: logger,
|
||||
prefixes: []string{root},
|
||||
filter: assertDirectory,
|
||||
}
|
||||
|
||||
return &l
|
||||
return NewFileLocator(
|
||||
WithLogger(logger),
|
||||
WithRoot(root),
|
||||
WithFilter(assertDirectory),
|
||||
)
|
||||
}
|
||||
|
||||
// assertDirectory checks wither the specified path is a directory.
|
||||
|
||||
@@ -19,7 +19,6 @@ package lookup
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -33,34 +32,39 @@ type executable struct {
|
||||
func NewExecutableLocator(logger *log.Logger, root string) Locator {
|
||||
paths := GetPaths(root)
|
||||
|
||||
var prefixes []string
|
||||
for _, dir := range paths {
|
||||
prefixes = append(prefixes, filepath.Join(root, dir))
|
||||
}
|
||||
return newExecutableLocator(logger, root, paths...)
|
||||
}
|
||||
|
||||
func newExecutableLocator(logger *log.Logger, root string, paths ...string) *executable {
|
||||
f := newFileLocator(
|
||||
WithLogger(logger),
|
||||
WithRoot(root),
|
||||
WithSearchPaths(paths...),
|
||||
WithFilter(assertExecutable),
|
||||
)
|
||||
|
||||
l := executable{
|
||||
file: file{
|
||||
logger: logger,
|
||||
prefixes: prefixes,
|
||||
filter: assertExecutable,
|
||||
},
|
||||
file: *f,
|
||||
}
|
||||
|
||||
return &l
|
||||
}
|
||||
|
||||
var _ Locator = (*executable)(nil)
|
||||
|
||||
// Locate finds executable files in the path. If a relative or absolute path is specified, the prefix paths are not considered.
|
||||
func (p executable) Locate(filename string) ([]string, error) {
|
||||
// Locate finds executable files with the specified pattern in the path.
|
||||
// If a relative or absolute path is specified, the prefix paths are not considered.
|
||||
func (p executable) Locate(pattern string) ([]string, error) {
|
||||
// For absolute paths we ensure that it is executable
|
||||
if strings.Contains(filename, "/") {
|
||||
err := assertExecutable(filename)
|
||||
if strings.Contains(pattern, "/") {
|
||||
err := assertExecutable(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("absolute path %v is not an executable file: %v", filename, err)
|
||||
return nil, fmt.Errorf("absolute path %v is not an executable file: %v", pattern, err)
|
||||
}
|
||||
return []string{filename}, nil
|
||||
return []string{pattern}, nil
|
||||
}
|
||||
|
||||
return p.file.Locate(filename)
|
||||
return p.file.Locate(pattern)
|
||||
}
|
||||
|
||||
// assertExecutable checks whether the specified path is an execuable file.
|
||||
|
||||
77
internal/lookup/executable_test.go
Normal file
77
internal/lookup/executable_test.go
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestExecutableLocator(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
root string
|
||||
paths []string
|
||||
expectedPrefixes []string
|
||||
}{
|
||||
{
|
||||
root: "",
|
||||
expectedPrefixes: []string{""},
|
||||
},
|
||||
{
|
||||
root: "",
|
||||
paths: []string{"/"},
|
||||
expectedPrefixes: []string{"/"},
|
||||
},
|
||||
{
|
||||
root: "",
|
||||
paths: []string{"/", "/bin"},
|
||||
expectedPrefixes: []string{"/", "/bin"},
|
||||
},
|
||||
{
|
||||
root: "/",
|
||||
expectedPrefixes: []string{"/"},
|
||||
},
|
||||
{
|
||||
root: "/",
|
||||
paths: []string{"/"},
|
||||
expectedPrefixes: []string{"/"},
|
||||
},
|
||||
{
|
||||
root: "/",
|
||||
paths: []string{"/", "/bin"},
|
||||
expectedPrefixes: []string{"/", "/bin"},
|
||||
},
|
||||
{
|
||||
root: "/some/path",
|
||||
paths: []string{"/", "/bin"},
|
||||
expectedPrefixes: []string{"/some/path", "/some/path/bin"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
e := newExecutableLocator(logger, tc.root, tc.paths...)
|
||||
|
||||
require.EqualValues(t, tc.expectedPrefixes, e.prefixes)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -28,44 +28,123 @@ import (
|
||||
// prefixes. The validity of a file is determined by a filter function.
|
||||
type file struct {
|
||||
logger *log.Logger
|
||||
root string
|
||||
prefixes []string
|
||||
filter func(string) error
|
||||
}
|
||||
|
||||
// NewFileLocator creates a Locator that can be used to find files at the specified root. A logger
|
||||
// can also be specified.
|
||||
func NewFileLocator(logger *log.Logger, root string) Locator {
|
||||
l := newFileLocator(logger, root)
|
||||
// Option defines a function for passing options to the NewFileLocator() call
|
||||
type Option func(*file)
|
||||
|
||||
return &l
|
||||
// WithRoot sets the root for the file locator
|
||||
func WithRoot(root string) Option {
|
||||
return func(f *file) {
|
||||
f.root = root
|
||||
}
|
||||
}
|
||||
|
||||
func newFileLocator(logger *log.Logger, root string) file {
|
||||
return file{
|
||||
logger: logger,
|
||||
prefixes: []string{root},
|
||||
filter: assertFile,
|
||||
// WithLogger sets the logger for the file locator
|
||||
func WithLogger(logger *log.Logger) Option {
|
||||
return func(f *file) {
|
||||
f.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
// WithSearchPaths sets the search paths for the file locator.
|
||||
func WithSearchPaths(paths ...string) Option {
|
||||
return func(f *file) {
|
||||
f.prefixes = paths
|
||||
}
|
||||
}
|
||||
|
||||
// WithFilter sets the filter for the file locator
|
||||
// The filter is called for each candidate file and candidates that return nil are considered.
|
||||
func WithFilter(assert func(string) error) Option {
|
||||
return func(f *file) {
|
||||
f.filter = assert
|
||||
}
|
||||
}
|
||||
|
||||
// NewFileLocator creates a Locator that can be used to find files with the specified options.
|
||||
func NewFileLocator(opts ...Option) Locator {
|
||||
return newFileLocator(opts...)
|
||||
}
|
||||
|
||||
func newFileLocator(opts ...Option) *file {
|
||||
f := &file{}
|
||||
for _, opt := range opts {
|
||||
opt(f)
|
||||
}
|
||||
if f.logger == nil {
|
||||
f.logger = log.StandardLogger()
|
||||
}
|
||||
if f.filter == nil {
|
||||
f.filter = assertFile
|
||||
}
|
||||
// Since the `Locate` implementations rely on the root already being specified we update
|
||||
// the prefixes to include the root.
|
||||
f.prefixes = getSearchPrefixes(f.root, f.prefixes...)
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
// getSearchPrefixes generates a list of unique paths to be searched by a file locator.
|
||||
//
|
||||
// For each of the unique prefixes <p> specified, the path <root><p> is searched, where <root> is the
|
||||
// specified root. If no prefixes are specified, <root> is returned as the only search prefix.
|
||||
//
|
||||
// Note that an empty root is equivalent to searching relative to the current working directory, and
|
||||
// if the root filesystem should be searched instead, root should be specified as "/" explicitly.
|
||||
//
|
||||
// Also, a prefix of "" forces the root to be included in returned set of paths. This means that if
|
||||
// the root in addition to another prefix must be searched the function should be called with:
|
||||
//
|
||||
// getSearchPrefixes("/root", "", "another/path")
|
||||
//
|
||||
// and will result in the search paths []{"/root", "/root/another/path"} being returned.
|
||||
func getSearchPrefixes(root string, prefixes ...string) []string {
|
||||
seen := make(map[string]bool)
|
||||
var uniquePrefixes []string
|
||||
for _, p := range prefixes {
|
||||
if seen[p] {
|
||||
continue
|
||||
}
|
||||
seen[p] = true
|
||||
uniquePrefixes = append(uniquePrefixes, filepath.Join(root, p))
|
||||
}
|
||||
|
||||
if len(uniquePrefixes) == 0 {
|
||||
uniquePrefixes = append(uniquePrefixes, root)
|
||||
}
|
||||
|
||||
return uniquePrefixes
|
||||
}
|
||||
|
||||
var _ Locator = (*file)(nil)
|
||||
|
||||
// Locate attempts to find the specified file. All prefixes are searched and any matching
|
||||
// candidates are returned. If no matches are found, an error is returned.
|
||||
func (p file) Locate(filename string) ([]string, error) {
|
||||
// Locate attempts to find files with names matching the specified pattern.
|
||||
// All prefixes are searched and any matching candidates are returned. If no matches are found, an error is returned.
|
||||
func (p file) Locate(pattern string) ([]string, error) {
|
||||
var filenames []string
|
||||
for _, prefix := range p.prefixes {
|
||||
candidate := filepath.Join(prefix, filename)
|
||||
p.logger.Debugf("Checking candidate '%v'", candidate)
|
||||
err := p.filter(candidate)
|
||||
pathPattern := filepath.Join(prefix, pattern)
|
||||
candidates, err := filepath.Glob(pathPattern)
|
||||
if err != nil {
|
||||
p.logger.Debugf("Candidate '%v' does not meet requirements: %v", candidate, err)
|
||||
continue
|
||||
p.logger.Debugf("Checking pattern '%v' failed: %v", pathPattern, err)
|
||||
}
|
||||
|
||||
for _, candidate := range candidates {
|
||||
p.logger.Debugf("Checking candidate '%v'", candidate)
|
||||
err := p.filter(candidate)
|
||||
if err != nil {
|
||||
p.logger.Debugf("Candidate '%v' does not meet requirements: %v", candidate, err)
|
||||
continue
|
||||
}
|
||||
filenames = append(filenames, candidate)
|
||||
}
|
||||
filenames = append(filenames, candidate)
|
||||
}
|
||||
if len(filename) == 0 {
|
||||
return nil, fmt.Errorf("file %v not found", filename)
|
||||
if len(filenames) == 0 {
|
||||
return nil, fmt.Errorf("pattern %v not found", pattern)
|
||||
}
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
82
internal/lookup/file_test.go
Normal file
82
internal/lookup/file_test.go
Normal file
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetSearchPrefixes(t *testing.T) {
|
||||
testCases := []struct {
|
||||
root string
|
||||
prefixes []string
|
||||
expectedPrefixes []string
|
||||
}{
|
||||
{
|
||||
root: "",
|
||||
expectedPrefixes: []string{""},
|
||||
},
|
||||
{
|
||||
root: "/",
|
||||
expectedPrefixes: []string{"/"},
|
||||
},
|
||||
{
|
||||
root: "/some/root",
|
||||
expectedPrefixes: []string{"/some/root"},
|
||||
},
|
||||
{
|
||||
root: "",
|
||||
prefixes: []string{"foo", "bar"},
|
||||
expectedPrefixes: []string{"foo", "bar"},
|
||||
},
|
||||
{
|
||||
root: "/",
|
||||
prefixes: []string{"foo", "bar"},
|
||||
expectedPrefixes: []string{"/foo", "/bar"},
|
||||
},
|
||||
{
|
||||
root: "/",
|
||||
prefixes: []string{"/foo", "/bar"},
|
||||
expectedPrefixes: []string{"/foo", "/bar"},
|
||||
},
|
||||
{
|
||||
root: "/some/root",
|
||||
prefixes: []string{"foo", "bar"},
|
||||
expectedPrefixes: []string{"/some/root/foo", "/some/root/bar"},
|
||||
},
|
||||
{
|
||||
root: "",
|
||||
prefixes: []string{"foo", "bar", "bar", "foo"},
|
||||
expectedPrefixes: []string{"foo", "bar"},
|
||||
},
|
||||
{
|
||||
root: "/some/root",
|
||||
prefixes: []string{"foo", "bar", "foo", "bar"},
|
||||
expectedPrefixes: []string{"/some/root/foo", "/some/root/bar"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
prefixes := getSearchPrefixes(tc.root, tc.prefixes...)
|
||||
require.EqualValues(t, tc.expectedPrefixes, prefixes)
|
||||
})
|
||||
}
|
||||
}
|
||||
68
internal/lookup/library.go
Normal file
68
internal/lookup/library.go
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type library struct {
|
||||
logger *log.Logger
|
||||
symlink Locator
|
||||
cache ldcache.LDCache
|
||||
}
|
||||
|
||||
var _ Locator = (*library)(nil)
|
||||
|
||||
// NewLibraryLocator creates a library locator using the specified logger.
|
||||
func NewLibraryLocator(logger *log.Logger, root string) (Locator, error) {
|
||||
cache, err := ldcache.New(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading ldcache: %v", err)
|
||||
}
|
||||
|
||||
l := library{
|
||||
symlink: NewSymlinkLocator(logger, root),
|
||||
cache: cache,
|
||||
}
|
||||
|
||||
return &l, nil
|
||||
}
|
||||
|
||||
// Locate finds the specified libraryname.
|
||||
// If the input is a library name, the ldcache is searched otherwise the
|
||||
// provided path is resolved as a symlink.
|
||||
func (l library) Locate(libname string) ([]string, error) {
|
||||
if strings.Contains(libname, "/") {
|
||||
return l.symlink.Locate(libname)
|
||||
}
|
||||
|
||||
paths32, paths64 := l.cache.Lookup(libname)
|
||||
if len(paths32) > 0 {
|
||||
l.logger.Warnf("Ignoring 32-bit libraries for %v: %v", libname, paths32)
|
||||
}
|
||||
|
||||
if len(paths64) == 0 {
|
||||
return nil, fmt.Errorf("64-bit library %v not found", libname)
|
||||
}
|
||||
|
||||
return paths64, nil
|
||||
}
|
||||
@@ -20,6 +20,9 @@ var _ Locator = &LocatorMock{}
|
||||
// LocateFunc: func(s string) ([]string, error) {
|
||||
// panic("mock out the Locate method")
|
||||
// },
|
||||
// RelativeFunc: func(s string) (string, error) {
|
||||
// panic("mock out the Relative method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedLocator in code that requires Locator
|
||||
@@ -30,6 +33,9 @@ type LocatorMock struct {
|
||||
// LocateFunc mocks the Locate method.
|
||||
LocateFunc func(s string) ([]string, error)
|
||||
|
||||
// RelativeFunc mocks the Relative method.
|
||||
RelativeFunc func(s string) (string, error)
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Locate holds details about calls to the Locate method.
|
||||
@@ -37,8 +43,14 @@ type LocatorMock struct {
|
||||
// S is the s argument value.
|
||||
S string
|
||||
}
|
||||
// Relative holds details about calls to the Relative method.
|
||||
Relative []struct {
|
||||
// S is the s argument value.
|
||||
S string
|
||||
}
|
||||
}
|
||||
lockLocate sync.RWMutex
|
||||
lockLocate sync.RWMutex
|
||||
lockRelative sync.RWMutex
|
||||
}
|
||||
|
||||
// Locate calls LocateFunc.
|
||||
@@ -75,3 +87,38 @@ func (mock *LocatorMock) LocateCalls() []struct {
|
||||
mock.lockLocate.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// Relative calls RelativeFunc.
|
||||
func (mock *LocatorMock) Relative(s string) (string, error) {
|
||||
callInfo := struct {
|
||||
S string
|
||||
}{
|
||||
S: s,
|
||||
}
|
||||
mock.lockRelative.Lock()
|
||||
mock.calls.Relative = append(mock.calls.Relative, callInfo)
|
||||
mock.lockRelative.Unlock()
|
||||
if mock.RelativeFunc == nil {
|
||||
var (
|
||||
sOut string
|
||||
errOut error
|
||||
)
|
||||
return sOut, errOut
|
||||
}
|
||||
return mock.RelativeFunc(s)
|
||||
}
|
||||
|
||||
// RelativeCalls gets all the calls that were made to Relative.
|
||||
// Check the length with:
|
||||
// len(mockedLocator.RelativeCalls())
|
||||
func (mock *LocatorMock) RelativeCalls() []struct {
|
||||
S string
|
||||
} {
|
||||
var calls []struct {
|
||||
S string
|
||||
}
|
||||
mock.lockRelative.RLock()
|
||||
calls = mock.calls.Relative
|
||||
mock.lockRelative.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
@@ -35,8 +35,9 @@ type symlink struct {
|
||||
// NewSymlinkChainLocator creats a locator that can be used for locating files through symlinks.
|
||||
// A logger can also be specified.
|
||||
func NewSymlinkChainLocator(logger *logrus.Logger, root string) Locator {
|
||||
f := newFileLocator(WithLogger(logger), WithRoot(root))
|
||||
l := symlinkChain{
|
||||
file: newFileLocator(logger, root),
|
||||
file: *f,
|
||||
}
|
||||
|
||||
return &l
|
||||
@@ -45,17 +46,18 @@ func NewSymlinkChainLocator(logger *logrus.Logger, root string) Locator {
|
||||
// NewSymlinkLocator creats a locator that can be used for locating files through symlinks.
|
||||
// A logger can also be specified.
|
||||
func NewSymlinkLocator(logger *logrus.Logger, root string) Locator {
|
||||
f := newFileLocator(WithLogger(logger), WithRoot(root))
|
||||
l := symlink{
|
||||
file: newFileLocator(logger, root),
|
||||
file: *f,
|
||||
}
|
||||
|
||||
return &l
|
||||
}
|
||||
|
||||
// Locate finds the specified file at the specified root. If the file is a symlink, the link is followed and all candidates
|
||||
// to the final target are returned.
|
||||
func (p symlinkChain) Locate(filename string) ([]string, error) {
|
||||
candidates, err := p.file.Locate(filename)
|
||||
// Locate finds the specified pattern at the specified root.
|
||||
// If the file is a symlink, the link is followed and all candidates to the final target are returned.
|
||||
func (p symlinkChain) Locate(pattern string) ([]string, error) {
|
||||
candidates, err := p.file.Locate(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -104,14 +106,15 @@ func (p symlinkChain) Locate(filename string) ([]string, error) {
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
// Locate finds the specified file at the specified root. If the file is a symlink, the link is resolved and the target returned.
|
||||
func (p symlink) Locate(filename string) ([]string, error) {
|
||||
candidates, err := p.file.Locate(filename)
|
||||
// Locate finds the specified pattern at the specified root.
|
||||
// If the file is a symlink, the link is resolved and the target returned.
|
||||
func (p symlink) Locate(pattern string) ([]string, error) {
|
||||
candidates, err := p.file.Locate(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(candidates) != 1 {
|
||||
return nil, fmt.Errorf("failed to uniquely resolve symlink %v: %v", filename, candidates)
|
||||
return nil, fmt.Errorf("failed to uniquely resolve symlink %v: %v", pattern, candidates)
|
||||
}
|
||||
|
||||
target, err := filepath.EvalSymlinks(candidates[0])
|
||||
|
||||
128
internal/modifier/cdi.go
Normal file
128
internal/modifier/cdi.go
Normal file
@@ -0,0 +1,128 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type cdiModifier struct {
|
||||
logger *logrus.Logger
|
||||
specDirs []string
|
||||
devices []string
|
||||
}
|
||||
|
||||
// NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the
|
||||
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES enviroment variable is
|
||||
// used to select the devices to include.
|
||||
func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
devices, err := getDevicesFromSpec(ociSpec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
|
||||
}
|
||||
if len(devices) == 0 {
|
||||
logger.Debugf("No devices requested; no modification required.")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
specDirs := cdi.DefaultSpecDirs
|
||||
if len(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.SpecDirs) > 0 {
|
||||
specDirs = cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.SpecDirs
|
||||
}
|
||||
|
||||
m := cdiModifier{
|
||||
logger: logger,
|
||||
specDirs: specDirs,
|
||||
devices: devices,
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func getDevicesFromSpec(ociSpec oci.Spec) ([]string, error) {
|
||||
rawSpec, err := ociSpec.Load()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
envDevices := image.DevicesFromEnvvars(visibleDevicesEnvvar)
|
||||
|
||||
_, annotationDevices, err := cdi.ParseAnnotations(rawSpec.Annotations)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse container annotations: %v", err)
|
||||
}
|
||||
|
||||
uniqueDevices := make(map[string]struct{})
|
||||
for _, name := range append(envDevices.List(), annotationDevices...) {
|
||||
if !cdi.IsQualifiedName(name) {
|
||||
name = cdi.QualifiedName("nvidia.com", "gpu", name)
|
||||
}
|
||||
uniqueDevices[name] = struct{}{}
|
||||
}
|
||||
|
||||
var devices []string
|
||||
for name := range uniqueDevices {
|
||||
devices = append(devices, name)
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
// Modify loads the CDI registry and injects the specified CDI devices into the OCI runtime specification.
|
||||
func (m cdiModifier) Modify(spec *specs.Spec) error {
|
||||
registry := cdi.GetRegistry(
|
||||
cdi.WithSpecDirs(m.specDirs...),
|
||||
cdi.WithAutoRefresh(false),
|
||||
)
|
||||
if err := registry.Refresh(); err != nil {
|
||||
m.logger.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
|
||||
}
|
||||
|
||||
devices := m.devices
|
||||
for _, d := range devices {
|
||||
if d == "nvidia.com/gpu=all" {
|
||||
devices = []string{}
|
||||
for _, candidate := range registry.DeviceDB().ListDevices() {
|
||||
if strings.HasPrefix(candidate, "nvidia.com/gpu=") {
|
||||
devices = append(devices, candidate)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
m.logger.Debugf("Injecting devices using CDI: %v", devices)
|
||||
_, err := registry.InjectDevices(spec, devices...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to inject CDI devices: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user