mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
109 Commits
v1.6.0-rc.
...
experiment
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8551c71a8a | ||
|
|
4562cb559c | ||
|
|
72e17e8632 | ||
|
|
6898917f41 | ||
|
|
7ee81e91de | ||
|
|
5a6860adbf | ||
|
|
53c130fb3c | ||
|
|
45bd3002da | ||
|
|
58042d78df | ||
|
|
aa52b12c09 | ||
|
|
47bc4f90ba | ||
|
|
41c1c2312a | ||
|
|
9d34134b3f | ||
|
|
d931e861f3 | ||
|
|
b1c9b8bb49 | ||
|
|
50fbcebe31 | ||
|
|
ce2b0dfdf0 | ||
|
|
6f0129bdf2 | ||
|
|
ffd98424d8 | ||
|
|
cf192169a8 | ||
|
|
eeabd2b94d | ||
|
|
78f38455fd | ||
|
|
f57e9b969c | ||
|
|
a174aae7b5 | ||
|
|
6890cb2ed8 | ||
|
|
13603e9794 | ||
|
|
afb260d82e | ||
|
|
f0311bfe17 | ||
|
|
050c29b157 | ||
|
|
de9afd4623 | ||
|
|
b231d8f365 | ||
|
|
ee2b84b228 | ||
|
|
0c24fa83ae | ||
|
|
79660d1e55 | ||
|
|
39d2ff06fa | ||
|
|
0ac288e6dd | ||
|
|
b334f1977b | ||
|
|
2d07385e81 | ||
|
|
fd5a1a72f0 | ||
|
|
738d28dac5 | ||
|
|
e662e8197c | ||
|
|
2964f26533 | ||
|
|
629d575fad | ||
|
|
7fb04878c7 | ||
|
|
f10f533fb2 | ||
|
|
9c2cdc2f81 | ||
|
|
5bbaf8af4b | ||
|
|
c6ce5b5a29 | ||
|
|
b9e752e24e | ||
|
|
94849fa822 | ||
|
|
b0d6948d94 | ||
|
|
995bd0d34a | ||
|
|
27bb5cca0c | ||
|
|
72d1d90ce9 | ||
|
|
6a1f7d0228 | ||
|
|
094631329f | ||
|
|
6731f050da | ||
|
|
2ee6ec5d17 | ||
|
|
1c25b349b1 | ||
|
|
d87bdf9ab6 | ||
|
|
472c89d051 | ||
|
|
3470f2ecb9 | ||
|
|
9c27e03c87 | ||
|
|
09c6995ff9 | ||
|
|
e2ec381093 | ||
|
|
7a31ebadb1 | ||
|
|
7a34be62b2 | ||
|
|
a4441b6545 | ||
|
|
ab3ebe5e49 | ||
|
|
ea0bf6fbf8 | ||
|
|
0a2db7c70e | ||
|
|
92bb04f0fd | ||
|
|
4d224a114a | ||
|
|
2795e7d132 | ||
|
|
58801d0c71 | ||
|
|
a13c785865 | ||
|
|
b57b8661ca | ||
|
|
d81329da1f | ||
|
|
d2575abd3a | ||
|
|
bc1f6e05a0 | ||
|
|
5db5205647 | ||
|
|
6a747f5dd3 | ||
|
|
81f9caa9aa | ||
|
|
684b5e9237 | ||
|
|
7d4a8200eb | ||
|
|
060f670232 | ||
|
|
1b3e2d9423 | ||
|
|
3b84f527cc | ||
|
|
06cd37b892 | ||
|
|
ec8a6d978d | ||
|
|
d234077780 | ||
|
|
b8acd7657a | ||
|
|
8da6e42d88 | ||
|
|
1a755d471a | ||
|
|
c05f9dffc5 | ||
|
|
7c5b913e09 | ||
|
|
c0d2d41f23 | ||
|
|
08cf87eb21 | ||
|
|
d3a9f512c4 | ||
|
|
73baa74ea8 | ||
|
|
39aa24690c | ||
|
|
dc6b7c703b | ||
|
|
b044b56c6e | ||
|
|
46cdf8c41a | ||
|
|
22ee5eb64f | ||
|
|
a7f3398a68 | ||
|
|
72fe259745 | ||
|
|
3b27d91026 | ||
|
|
d3997eceb2 |
268
.common-ci.yml
268
.common-ci.yml
@@ -34,86 +34,49 @@ stages:
|
||||
- release
|
||||
- build-all
|
||||
|
||||
build-dev-image:
|
||||
stage: image
|
||||
script:
|
||||
- apk --no-cache add make bash
|
||||
- make .build-image
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- make .push-build-image
|
||||
|
||||
.requires-build-image:
|
||||
image: "${BUILDIMAGE}"
|
||||
|
||||
.go-check:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-checks
|
||||
|
||||
fmt:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make assert-fmt
|
||||
|
||||
vet:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make vet
|
||||
|
||||
lint:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make lint
|
||||
allow_failure: true
|
||||
|
||||
ineffassign:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make ineffassign
|
||||
allow_failure: true
|
||||
|
||||
misspell:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make misspell
|
||||
|
||||
go-build:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-build
|
||||
script:
|
||||
- make build
|
||||
|
||||
unit-tests:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: unit-tests
|
||||
script:
|
||||
- make coverage
|
||||
|
||||
|
||||
# Define the distribution targets
|
||||
.dist-amazonlinux2:
|
||||
variables:
|
||||
DIST: amazonlinux2
|
||||
|
||||
.dist-centos7:
|
||||
variables:
|
||||
DIST: centos7
|
||||
CVE_UPDATES: "nss"
|
||||
|
||||
.dist-centos8:
|
||||
variables:
|
||||
DIST: centos8
|
||||
|
||||
.dist-debian10:
|
||||
variables:
|
||||
DIST: debian10
|
||||
|
||||
.dist-debian9:
|
||||
variables:
|
||||
DIST: debian9
|
||||
|
||||
.dist-opensuse-leap15.1:
|
||||
variables:
|
||||
DIST: opensuse-leap15.1
|
||||
|
||||
.dist-ubi8:
|
||||
variables:
|
||||
DIST: ubi8
|
||||
|
||||
.dist-ubuntu16.04:
|
||||
variables:
|
||||
DIST: ubuntu16.04
|
||||
|
||||
.dist-ubuntu18.04:
|
||||
variables:
|
||||
DIST: ubuntu18.04
|
||||
|
||||
.dist-packaging:
|
||||
variables:
|
||||
DIST: packaging
|
||||
|
||||
# Define architecture targets
|
||||
.arch-aarch64:
|
||||
variables:
|
||||
ARCH: aarch64
|
||||
@@ -134,121 +97,6 @@ unit-tests:
|
||||
variables:
|
||||
ARCH: x86_64
|
||||
|
||||
# Define the package build helpers
|
||||
.multi-arch-build:
|
||||
before_script:
|
||||
- apk add --no-cache coreutils build-base sed git bash make
|
||||
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -c yes'
|
||||
|
||||
.package-artifacts:
|
||||
variables:
|
||||
ARTIFACTS_NAME: "toolkit-container-${CI_PIPELINE_ID}"
|
||||
ARTIFACTS_ROOT: "toolkit-container-${CI_PIPELINE_ID}"
|
||||
DIST_DIR: ${CI_PROJECT_DIR}/${ARTIFACTS_ROOT}
|
||||
|
||||
.package-build:
|
||||
extends:
|
||||
- .multi-arch-build
|
||||
- .package-artifacts
|
||||
stage: package-build
|
||||
script:
|
||||
- ./scripts/release.sh ${DIST}-${ARCH}
|
||||
|
||||
artifacts:
|
||||
name: ${ARTIFACTS_NAME}
|
||||
paths:
|
||||
- ${ARTIFACTS_ROOT}
|
||||
|
||||
# Define the package build targets
|
||||
package-ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-amd64
|
||||
|
||||
package-ubuntu18.04-arm64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-arm64
|
||||
|
||||
package-ubuntu18.04-ppc64le:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-ppc64le
|
||||
|
||||
package-centos7-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos7
|
||||
- .arch-x86_64
|
||||
|
||||
package-centos8-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-x86_64
|
||||
|
||||
# Define the image build targets
|
||||
.image-build:
|
||||
stage: image-build
|
||||
variables:
|
||||
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
before_script:
|
||||
- apk add --no-cache bash make
|
||||
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos7
|
||||
needs:
|
||||
- package-centos7-x86_64
|
||||
script:
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- package-centos8-x86_64
|
||||
script:
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-ubi8
|
||||
needs:
|
||||
# Note: The ubi8 image currently uses the centos7 packages
|
||||
- package-centos7-x86_64
|
||||
script:
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- package-ubuntu18.04-amd64
|
||||
# TODO: These will be required once we generate multi-arch images
|
||||
# - package-ubuntu18.04-arm64
|
||||
# - package-ubuntu18.04-ppc64le
|
||||
script:
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
# Define test helpers
|
||||
.integration:
|
||||
stage: test
|
||||
@@ -262,62 +110,13 @@ image-ubuntu18.04:
|
||||
script:
|
||||
- make -f build/container/Makefile test-${DIST}
|
||||
|
||||
.test:toolkit:
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "toolkit"
|
||||
|
||||
.test:docker:
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "docker"
|
||||
|
||||
.test:containerd:
|
||||
# TODO: The containerd tests fail due to issues with SIGHUP.
|
||||
# Until this is resolved with retry up to twice and allow failure here.
|
||||
retry: 2
|
||||
allow_failure: true
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "containerd"
|
||||
|
||||
.test:crio:
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "crio"
|
||||
|
||||
# Define the test targets
|
||||
test-toolkit-ubuntu18.04:
|
||||
test-packaging:
|
||||
extends:
|
||||
- .test:toolkit
|
||||
- .dist-ubuntu18.04
|
||||
- .integration
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-containerd-ubuntu18.04:
|
||||
extends:
|
||||
- .test:containerd
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-crio-ubuntu18.04:
|
||||
extends:
|
||||
- .test:crio
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-docker-ubuntu18.04:
|
||||
extends:
|
||||
- .test:docker
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
- image-packaging
|
||||
|
||||
# .release forms the base of the deployment jobs which push images to the CI registry.
|
||||
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
|
||||
@@ -407,3 +206,10 @@ release:staging-ubuntu18.04:
|
||||
- test-containerd-ubuntu18.04
|
||||
- test-crio-ubuntu18.04
|
||||
- test-docker-ubuntu18.04
|
||||
|
||||
release:staging-packaging:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- test-packaging
|
||||
|
||||
312
.gitlab-ci.yml
312
.gitlab-ci.yml
@@ -15,6 +15,318 @@
|
||||
include:
|
||||
- .common-ci.yml
|
||||
|
||||
build-dev-image:
|
||||
stage: image
|
||||
script:
|
||||
- apk --no-cache add make bash
|
||||
- make .build-image
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- make .push-build-image
|
||||
|
||||
.requires-build-image:
|
||||
image: "${BUILDIMAGE}"
|
||||
|
||||
.go-check:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-checks
|
||||
|
||||
fmt:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make assert-fmt
|
||||
|
||||
vet:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make vet
|
||||
|
||||
lint:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make lint
|
||||
allow_failure: true
|
||||
|
||||
ineffassign:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make ineffassign
|
||||
allow_failure: true
|
||||
|
||||
misspell:
|
||||
extends:
|
||||
- .go-check
|
||||
script:
|
||||
- make misspell
|
||||
|
||||
go-build:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: go-build
|
||||
script:
|
||||
- make build
|
||||
|
||||
unit-tests:
|
||||
extends:
|
||||
- .requires-build-image
|
||||
stage: unit-tests
|
||||
script:
|
||||
- make coverage
|
||||
|
||||
# Define the package build helpers
|
||||
.multi-arch-build:
|
||||
before_script:
|
||||
- apk add --no-cache coreutils build-base sed git bash make
|
||||
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -c yes'
|
||||
|
||||
.package-artifacts:
|
||||
variables:
|
||||
ARTIFACTS_NAME: "toolkit-container-${CI_PIPELINE_ID}"
|
||||
ARTIFACTS_ROOT: "toolkit-container-${CI_PIPELINE_ID}"
|
||||
DIST_DIR: ${CI_PROJECT_DIR}/${ARTIFACTS_ROOT}
|
||||
|
||||
.package-build:
|
||||
extends:
|
||||
- .multi-arch-build
|
||||
- .package-artifacts
|
||||
stage: package-build
|
||||
script:
|
||||
- ./scripts/release.sh ${DIST}-${ARCH}
|
||||
|
||||
artifacts:
|
||||
name: ${ARTIFACTS_NAME}
|
||||
paths:
|
||||
- ${ARTIFACTS_ROOT}
|
||||
|
||||
# Define the package build targets
|
||||
package-amazonlinux2-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-amazonlinux2
|
||||
- .arch-aarch64
|
||||
|
||||
package-amazonlinux2-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-amazonlinux2
|
||||
- .arch-x86_64
|
||||
|
||||
package-centos7-ppc64le:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos7
|
||||
- .arch-ppc64le
|
||||
|
||||
package-centos7-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos7
|
||||
- .arch-x86_64
|
||||
|
||||
package-centos8-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-aarch64
|
||||
|
||||
package-centos8-ppc64le:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-ppc64le
|
||||
|
||||
package-centos8-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-centos8
|
||||
- .arch-x86_64
|
||||
|
||||
package-debian10-amd64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-debian10
|
||||
- .arch-amd64
|
||||
|
||||
package-debian9-amd64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
package-opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-opensuse-leap15.1
|
||||
- .arch-x86_64
|
||||
|
||||
package-ubuntu16.04-amd64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-ubuntu16.04
|
||||
- .arch-amd64
|
||||
|
||||
package-ubuntu16.04-ppc64le:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-ubuntu16.04
|
||||
- .arch-ppc64le
|
||||
|
||||
package-ubuntu18.04-amd64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-amd64
|
||||
|
||||
package-ubuntu18.04-arm64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-arm64
|
||||
|
||||
package-ubuntu18.04-ppc64le:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-ppc64le
|
||||
|
||||
# Define the image build targets
|
||||
.image-build:
|
||||
stage: image-build
|
||||
variables:
|
||||
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
before_script:
|
||||
- apk add --no-cache bash make
|
||||
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
script:
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos7
|
||||
needs:
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-x86_64
|
||||
- package-centos8-ppc64le
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-ubi8
|
||||
needs:
|
||||
# Note: The ubi8 image currently uses the centos7 packages
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
|
||||
# The DIST=packaging target creates an image containing all built packages
|
||||
image-packaging:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- package-amazonlinux2-aarch64
|
||||
- package-amazonlinux2-x86_64
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-ppc64le
|
||||
- package-centos8-x86_64
|
||||
- package-debian10-amd64
|
||||
- package-debian9-amd64
|
||||
- package-opensuse-leap15.1-x86_64
|
||||
- package-ubuntu16.04-amd64
|
||||
- package-ubuntu16.04-ppc64le
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
|
||||
# Define publish test helpers
|
||||
.test:toolkit:
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "toolkit"
|
||||
|
||||
.test:docker:
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "docker"
|
||||
|
||||
.test:containerd:
|
||||
# TODO: The containerd tests fail due to issues with SIGHUP.
|
||||
# Until this is resolved with retry up to twice and allow failure here.
|
||||
retry: 2
|
||||
allow_failure: true
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "containerd"
|
||||
|
||||
.test:crio:
|
||||
extends:
|
||||
- .integration
|
||||
variables:
|
||||
TEST_CASES: "crio"
|
||||
|
||||
# Define the test targets
|
||||
test-toolkit-ubuntu18.04:
|
||||
extends:
|
||||
- .test:toolkit
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-containerd-ubuntu18.04:
|
||||
extends:
|
||||
- .test:containerd
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-crio-ubuntu18.04:
|
||||
extends:
|
||||
- .test:crio
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-docker-ubuntu18.04:
|
||||
extends:
|
||||
- .test:docker
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
# build-all jobs build packages for every OS / ARCH combination we support.
|
||||
#
|
||||
# They are run under two conditions:
|
||||
|
||||
@@ -32,6 +32,63 @@ variables:
|
||||
DEVEL_RELEASE_IMAGE_VERSION: "devel"
|
||||
# On the multi-arch builder we don't need the qemu setup.
|
||||
SKIP_QEMU_SETUP: "1"
|
||||
# Define the public staging registry
|
||||
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
|
||||
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
|
||||
|
||||
.image-pull:
|
||||
stage: image-build
|
||||
variables:
|
||||
IN_REGISTRY: "${STAGING_REGISTRY}"
|
||||
IN_IMAGE_NAME: container-toolkit
|
||||
IN_VERSION: "${STAGING_VERSION}"
|
||||
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
||||
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||
OUT_REGISTRY: "${CI_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
# We delay the job start to allow the public pipeline to generate the required images.
|
||||
when: delayed
|
||||
start_in: 30 minutes
|
||||
timeout: 30 minutes
|
||||
retry:
|
||||
max: 2
|
||||
when:
|
||||
- job_execution_timeout
|
||||
- stuck_or_timeout_failure
|
||||
before_script:
|
||||
- >
|
||||
docker pull ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
|
||||
script:
|
||||
- docker pull ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}
|
||||
- docker tag ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} ${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}
|
||||
- docker login -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" "${OUT_REGISTRY}"
|
||||
- docker push ${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-centos7
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-centos8
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubi8
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu18.04
|
||||
|
||||
# The DIST=packaging target creates an image containing all built packages
|
||||
image-packaging:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-packaging
|
||||
|
||||
# We skip the integration tests for the internal CI:
|
||||
.integration:
|
||||
@@ -49,13 +106,9 @@ variables:
|
||||
variables:
|
||||
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
|
||||
IMAGE_ARCHIVE: "container-toolkit.tar"
|
||||
rules:
|
||||
- if: $CI_COMMIT_MESSAGE =~ /\[skip[ _-]scans?\]/i
|
||||
when: never
|
||||
- if: $SKIP_SCANS
|
||||
when: never
|
||||
- if: $CI_COMMIT_TAG == null && $CI_COMMIT_BRANCH != $RELEASE_DEVEL_BRANCH
|
||||
allow_failure: true
|
||||
except:
|
||||
variables:
|
||||
- $SKIP_SCANS && $SKIP_SCANS == "yes"
|
||||
before_script:
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
# TODO: We should specify the architecture here and scan all architectures
|
||||
@@ -66,7 +119,7 @@ variables:
|
||||
export SSA_TOKEN=$(curl --request POST --header "Authorization: Basic $AuthHeader" --header "Content-Type: application/x-www-form-urlencoded" ${SSA_ISSUER_URL} | jq ".access_token" | tr -d '"')
|
||||
- if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi
|
||||
script:
|
||||
- pulse-cli -n $NSPECT_ID --pss $PSS_URL --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
|
||||
- pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
|
||||
artifacts:
|
||||
when: always
|
||||
expire_in: 1 week
|
||||
@@ -115,8 +168,6 @@ scan-ubi8:
|
||||
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
|
||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||
# TODO: For now we disable external releases
|
||||
DOCKER: echo
|
||||
|
||||
.release:dockerhub:
|
||||
extends:
|
||||
@@ -127,8 +178,12 @@ scan-ubi8:
|
||||
OUT_REGISTRY: "${DOCKERHUB_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${REGISTRY_IMAGE}"
|
||||
|
||||
# TODO: For now we disable external releases
|
||||
DOCKER: echo
|
||||
release:staging-ubuntu18.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
# Define the external release targets
|
||||
# Release to NGC
|
||||
|
||||
5
Makefile
5
Makefile
@@ -16,11 +16,8 @@ DOCKER ?= docker
|
||||
MKDIR ?= mkdir
|
||||
DIST_DIR ?= $(CURDIR)/dist
|
||||
|
||||
LIB_NAME := nvidia-container-toolkit
|
||||
LIB_VERSION := 1.6.0
|
||||
LIB_TAG ?= rc.2
|
||||
include $(CURDIR)/versions.mk
|
||||
|
||||
GOLANG_VERSION := 1.16.3
|
||||
MODULE := github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
# By default run all native docker-based targets
|
||||
|
||||
@@ -46,16 +46,18 @@ ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=utility
|
||||
|
||||
ARG ARTIFACTS_ROOT
|
||||
ARG PACKAGE_DIST
|
||||
COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
|
||||
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
ARG ARTIFACTS_DIR
|
||||
COPY ${ARTIFACTS_DIR}/* /artifacts/packages/
|
||||
|
||||
ARG PACKAGE_VERSION
|
||||
ARG PACKAGE_ARCH
|
||||
RUN yum localinstall -y \
|
||||
libnvidia-container1-${PACKAGE_VERSION}*.rpm \
|
||||
libnvidia-container-tools-${PACKAGE_VERSION}*.rpm \
|
||||
nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
@@ -73,4 +75,11 @@ LABEL description="See summary"
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
|
||||
ENTRYPOINT ["/work/nvidia-toolkit"]
|
||||
# Install / upgrade packages here that are required to resolve CVEs
|
||||
ARG CVE_UPDATES
|
||||
RUN if [ -n "${CVE_UPDATES}" ]; then \
|
||||
yum update -y ${CVE_UPDATES} && \
|
||||
rm -rf /var/cache/yum/*; \
|
||||
fi
|
||||
|
||||
ENTRYPOINT ["/work/nvidia-toolkit"]
|
||||
|
||||
29
build/container/Dockerfile.packaging
Normal file
29
build/container/Dockerfile.packaging
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG BASE_DIST
|
||||
ARG CUDA_VERSION
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
ARG VERSION="N/A"
|
||||
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ENV NVIDIA_CONTAINER_TOOLKIT_VERSION="${VERSION}"
|
||||
|
||||
ARG ARTIFACTS_ROOT
|
||||
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
@@ -52,16 +52,18 @@ ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=utility
|
||||
|
||||
ARG ARTIFACTS_ROOT
|
||||
ARG PACKAGE_DIST
|
||||
COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
|
||||
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
ARG ARTIFACTS_DIR
|
||||
COPY ${ARTIFACTS_DIR}/* /artifacts/packages/
|
||||
|
||||
ARG PACKAGE_VERSION
|
||||
ARG PACKAGE_ARCH
|
||||
RUN dpkg -i \
|
||||
libnvidia-container1_${PACKAGE_VERSION}*.deb \
|
||||
libnvidia-container-tools_${PACKAGE_VERSION}*.deb \
|
||||
nvidia-container-toolkit_${PACKAGE_VERSION}*.deb
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit_${PACKAGE_VERSION}*.deb
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
|
||||
@@ -17,21 +17,15 @@ MKDIR ?= mkdir
|
||||
DIST_DIR ?= $(CURDIR)/dist
|
||||
|
||||
##### Global variables #####
|
||||
include $(CURDIR)/versions.mk
|
||||
|
||||
# TODO: These should be defined ONCE and currently duplicate the version in the
|
||||
# toolkit makefile.
|
||||
LIB_VERSION := 1.6.0
|
||||
LIB_TAG := rc.2
|
||||
|
||||
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
|
||||
CUDA_VERSION ?= 11.4.2
|
||||
GOLANG_VERSION ?= 1.16.4
|
||||
ifeq ($(IMAGE_NAME),)
|
||||
REGISTRY ?= nvidia
|
||||
IMAGE_NAME := $(REGISTRY)/container-toolkit
|
||||
endif
|
||||
|
||||
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
|
||||
IMAGE_TAG ?= $(VERSION)-$(DIST)
|
||||
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
|
||||
|
||||
@@ -39,12 +33,15 @@ IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
|
||||
DEFAULT_PUSH_TARGET := ubuntu18.04
|
||||
TARGETS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
|
||||
|
||||
BUILD_TARGETS := $(patsubst %, build-%, $(TARGETS))
|
||||
PUSH_TARGETS := $(patsubst %, push-%, $(TARGETS))
|
||||
TEST_TARGETS := $(patsubst %, test-%, $(TARGETS))
|
||||
META_TARGETS := packaging
|
||||
|
||||
BUILD_TARGETS := $(patsubst %,build-%,$(TARGETS) $(META_TARGETS))
|
||||
PUSH_TARGETS := $(patsubst %,push-%,$(TARGETS) $(META_TARGETS))
|
||||
TEST_TARGETS := $(patsubst %,test-%, $(TARGETS))
|
||||
|
||||
.PHONY: $(TARGETS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||
|
||||
push-%: DIST = $(*)
|
||||
$(PUSH_TARGETS): push-%:
|
||||
$(DOCKER) push "$(IMAGE_NAME):$(IMAGE_TAG)"
|
||||
|
||||
@@ -62,41 +59,51 @@ push-short:
|
||||
build-%: DIST = $(*)
|
||||
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
|
||||
|
||||
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
||||
|
||||
# Use a generic build target to build the relevant images
|
||||
$(BUILD_TARGETS): build-%: $(ARTIFACTS_DIR)
|
||||
$(DOCKER) build --pull \
|
||||
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
DOCKER_BUILDKIT=1 \
|
||||
$(DOCKER) build --pull \
|
||||
--platform=linux/amd64 \
|
||||
--tag $(IMAGE) \
|
||||
--build-arg ARTIFACTS_DIR="$(ARTIFACTS_DIR)" \
|
||||
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
|
||||
--build-arg BASE_DIST="$(BASE_DIST)" \
|
||||
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
|
||||
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
|
||||
--build-arg PACKAGE_ARCH="$(PACKAGE_ARCH)" \
|
||||
--build-arg VERSION="$(VERSION)" \
|
||||
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
|
||||
-f $(DOCKERFILE) \
|
||||
$(CURDIR)
|
||||
|
||||
|
||||
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
||||
|
||||
build-ubuntu%: BASE_DIST = $(*)
|
||||
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
|
||||
build-ubuntu%: ARTIFACTS_DIR = $(ARTIFACTS_ROOT)/$(*)/amd64
|
||||
build-ubuntu%: PACKAGE_ARCH := amd64
|
||||
build-ubuntu%: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
|
||||
build-ubuntu18.04: BASE_DIST := ubuntu18.04
|
||||
build-ubuntu20.04: BASE_DIST := ubuntu20.04
|
||||
|
||||
build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
# TODO: Update this to use the centos8 packages
|
||||
build-ubi8: ARTIFACTS_DIR = $(ARTIFACTS_ROOT)/centos7/x86_64
|
||||
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
build-ubi8: BASE_DIST := ubi8
|
||||
build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
build-ubi8: PACKAGE_ARCH := x86_64
|
||||
build-ubi8: PACKAGE_DIST = centos7
|
||||
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-centos%: BASE_DIST = $(*)
|
||||
build-centos%: DOCKERFILE_SUFFIX := centos
|
||||
build-centos%: ARTIFACTS_DIR = $(ARTIFACTS_ROOT)/$(*)/x86_64
|
||||
build-centos%: PACKAGE_ARCH := x86_64
|
||||
build-centos%: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos%: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-centos7: BASE_DIST := centos7
|
||||
build-centos8: BASE_DIST := centos8
|
||||
build-packaging: BASE_DIST := ubuntu20.04
|
||||
build-packaging: DOCKERFILE_SUFFIX := packaging
|
||||
build-packaging: PACKAGE_ARCH := amd64
|
||||
build-packaging: PACKAGE_DIST = all
|
||||
build-packaging: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
|
||||
# Test targets
|
||||
test-%: DIST = $(*)
|
||||
@@ -108,3 +115,22 @@ $(TEST_TARGETS): test-%:
|
||||
$(IMAGE) \
|
||||
--no-cleanup-on-error
|
||||
|
||||
.PHONY: test-packaging
|
||||
test-packaging: DIST = packaging
|
||||
test-packaging:
|
||||
@echo "Testing package image contents"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/amazonlinux2/aarch64" || echo "Missing amazonlinux2/aarch64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/amazonlinux2/x86_64" || echo "Missing amazonlinux2/x86_64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/ppc64le" || echo "Missing centos7/ppc64le"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian10/amd64" || echo "Missing debian10/amd64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian9/amd64" || echo "Missing debian9/amd64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/opensuse-leap15.1/x86_64" || echo "Missing opensuse-leap15.1/x86_64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu16.04/amd64" || echo "Missing ubuntu16.04/amd64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu16.04/ppc64le" || echo "Missing ubuntu16.04/ppc64le"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
|
||||
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"
|
||||
|
||||
170
cmd/nvidia-container-runtime.experimental/README.md
Normal file
170
cmd/nvidia-container-runtime.experimental/README.md
Normal file
@@ -0,0 +1,170 @@
|
||||
# The Experimental NVIDIA Container Runtime
|
||||
|
||||
## Introduction
|
||||
|
||||
The experimental NVIDIA Container Runtime is a proof-of-concept runtime that
|
||||
approaches the problem of making GPUs (or other NVIDIA devices) available in
|
||||
containerized environments in a different manner to the existing
|
||||
[NVIDIA Container Runtime](../nvidia-container-runtime). Wherease the current
|
||||
runtime relies on the [NVIDIA Container Library](https://github.com/NVIDIA/libnvidia-container)
|
||||
to perform the modifications to a container, the experiemental runtime aims to
|
||||
express the required modifications in terms of changes to a container's [OCI
|
||||
runtime specification](https://github.com/opencontainers/runtime-spec). This
|
||||
also aligns with open initiatives such as the [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface).
|
||||
|
||||
## Known Limitations
|
||||
|
||||
* The path of NVIDIA CUDA libraries / binaries injected into the container currently match that of the host system. This means that on an Ubuntu-based host systems these would be at `/usr/lib/x86_64-linux-gnu`
|
||||
even if the container distribution would normally expect these at another location (e.g. `/usr/lib64`)
|
||||
* Tools such as `nvidia-smi` may create additional device nodes in the container when run. This is
|
||||
prevented in the "classic" runtime (and the NVIDIA Container Library) by modifying
|
||||
the `/proc/driver/nvidia/params` file in the container.
|
||||
* Other `NVIDIA_*` environment variables (e.g. `NVIDIA_DRIVER_CAPABILITIES`) are
|
||||
not considered to filter mounted libraries or binaries. This is equivalent to
|
||||
always using `NVIDIA_DRIVER_CAPABILITIES=all`.
|
||||
|
||||
## Building / Installing
|
||||
|
||||
The experimental NVIDIA Container Runtime is a self-contained golang binary and
|
||||
can thus be built from source, or installed directly using `go install`.
|
||||
|
||||
### From source
|
||||
|
||||
After cloning the `nvidia-container-toolkit` repository from [GitLab](https://gitlab.com/nvidia/container-toolkit/container-toolkit)
|
||||
or from the read-only mirror on [GitHub](https://github.com/NVIDIA/nvidia-container-toolkit)
|
||||
running the following make command in the repository root:
|
||||
```bash
|
||||
make cmd-nvidia-container-runtime.experimental
|
||||
```
|
||||
will create an executable file `nvidia-container-runtime.experimental` in the
|
||||
root.
|
||||
|
||||
A dockerized target:
|
||||
```bash
|
||||
make docker-cmd-nvidia-container-runtime.experimental
|
||||
```
|
||||
will also create the executable file `nvidia-container-runtime.experimental`
|
||||
without requiring the setup of a development environment (with the exception)
|
||||
of having `make` and `docker` installed.
|
||||
|
||||
### Go install
|
||||
|
||||
The experimental NVIDIA Container Runtime can also be `go installed` by running
|
||||
the following command:
|
||||
|
||||
```bash
|
||||
go install github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental@experimental
|
||||
```
|
||||
which will build and install the `nvidia-container-runtime.experimental`
|
||||
executable in the `${GOPATH}/bin` folder.
|
||||
|
||||
## Using the Runtime
|
||||
|
||||
The experimental NVIDIA Container Runtime is intended as a drop-in replacement
|
||||
for the "classic" NVIDIA Container Runtime. As such it is used in the same
|
||||
way (with the exception of the known limitiations noted above).
|
||||
|
||||
In general terms, to use the experimental NVIDIA Container Runtime to launch a
|
||||
container with GPU support, it should be inserted as a shim for the desired
|
||||
low-level OCI-compliant runtime (e.g. `runc` or `crun`). How this is achieved
|
||||
depends on how containers are being launched.
|
||||
|
||||
### Docker
|
||||
In the case of `docker` for example, the runtime must be registered with the
|
||||
Docker daemon. This can be done by modifying the `/etc/docker/daemon.json` file
|
||||
to contain the following:
|
||||
```json
|
||||
{
|
||||
"runtimes": {
|
||||
"nvidia-experimental": {
|
||||
"path": "nvidia-container-runtime.experimental",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
},
|
||||
}
|
||||
```
|
||||
This can then be invoked from docker by including the `--runtime=nvidia-experimental`
|
||||
option when executing a `docker run` command.
|
||||
|
||||
### Runc
|
||||
|
||||
If `runc` is being used to run a container directly substituting the `runc`
|
||||
command for `nvidia-container-runtime.experimental` should be sufficient as
|
||||
the latter will `exec` to `runc` once the required (in-place) modifications have
|
||||
been made to the container's OCI spec (`config.json` file).
|
||||
|
||||
## Configuration
|
||||
|
||||
### Runtime Path
|
||||
The experimental NVIDIA Container Runtime allows for the path to the low-level
|
||||
runtime to be specified. This is done by setting the following option in the
|
||||
`/etc/nvidia-container-runtime/config.toml` file or setting the
|
||||
`NVIDIA_CONTAINER_RUNTIME_PATH` environment variable.
|
||||
|
||||
```toml
|
||||
[nvidia-container-runtime.experimental]
|
||||
|
||||
runtime-path = "/path/to/low-level-runtime"
|
||||
```
|
||||
This path can be set to the path for `runc` or `crun` on a system and if it is
|
||||
a relative path, the `PATH` is searched for a matching executable.
|
||||
|
||||
### Device Selection
|
||||
In order to select a specific device, the experimental NVIDIA Container Runtime
|
||||
mimics the behaviour of the "classic" runtime. That is to say that the values of
|
||||
certain [environment variables](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#environment-variables-oci-spec)
|
||||
**in the container's OCI specification** control the behaviour of the runtime.
|
||||
|
||||
#### `NVIDIA_VISIBLE_DEVICES`
|
||||
This variable controls which GPUs will be made accessible inside the container.
|
||||
|
||||
##### Possible values
|
||||
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
|
||||
* `all`: all GPUs will be accessible, this is the default value in our container images.
|
||||
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
|
||||
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
|
||||
|
||||
**Note**: When running on a MIG capable device, the following values will also be available:
|
||||
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
|
||||
|
||||
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
|
||||
```
|
||||
$ nvidia-smi -L
|
||||
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
|
||||
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
|
||||
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
|
||||
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
|
||||
```
|
||||
|
||||
#### `NVIDIA_MIG_CONFIG_DEVICES`
|
||||
This variable controls which of the visible GPUs can have their MIG
|
||||
configuration managed from within the container. This includes enabling and
|
||||
disabling MIG mode, creating and destroying GPU Instances and Compute
|
||||
Instances, etc.
|
||||
|
||||
##### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG configurations managed.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
|
||||
|
||||
#### `NVIDIA_MIG_MONITOR_DEVICES`
|
||||
This variable controls which of the visible GPUs can have aggregate information
|
||||
about all of their MIG devices monitored from within the container. This
|
||||
includes inspecting the aggregate memory usage, listing the aggregate running
|
||||
processes, etc.
|
||||
|
||||
##### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG devices monitored.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// list represents a list of config updaters that are applied in order, with later
|
||||
// configs having preference.
|
||||
type list struct {
|
||||
logger *log.Logger
|
||||
configs []configUpdater
|
||||
}
|
||||
|
||||
var _ configUpdater = (*list)(nil)
|
||||
|
||||
func newListWithLogger(logger *log.Logger, ci ...configUpdater) configUpdater {
|
||||
c := list{
|
||||
logger: logger,
|
||||
configs: ci,
|
||||
}
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c list) Update(cfg *Config) error {
|
||||
for i, u := range c.configs {
|
||||
c.logger.Debugf("Applying config %v: %v", i, u)
|
||||
err := u.Update(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error applying config %v: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestComposite(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
// Empty list
|
||||
c := newListWithLogger(logger)
|
||||
|
||||
cfg := &Config{}
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{}, cfg)
|
||||
|
||||
// Add a single mock config
|
||||
mockConfigUpdater := &configUpdaterMock{
|
||||
UpdateFunc: func(cfg *Config) error {
|
||||
cfg.DebugFilePath = "updated"
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
c = newListWithLogger(logger, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 1)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// define an error config
|
||||
errorConfigUpdater := &configUpdaterMock{
|
||||
UpdateFunc: func(cfg *Config) error {
|
||||
return fmt.Errorf("mock error")
|
||||
},
|
||||
}
|
||||
|
||||
c = newListWithLogger(logger, errorConfigUpdater, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.Error(t, err)
|
||||
require.EqualValues(t, &Config{}, cfg)
|
||||
|
||||
require.Len(t, errorConfigUpdater.UpdateCalls(), 1)
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 0)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
errorConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// Change the order of the config and error
|
||||
c = newListWithLogger(logger, mockConfigUpdater, errorConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.Error(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, errorConfigUpdater.UpdateCalls(), 1)
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 1)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
errorConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// Call the mock twice
|
||||
c = newListWithLogger(logger, mockConfigUpdater, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 2)
|
||||
}
|
||||
63
cmd/nvidia-container-runtime.experimental/config/config.go
Normal file
63
cmd/nvidia-container-runtime.experimental/config/config.go
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Config defines the configuration options for the NVIDIA Container Runtime
|
||||
type Config struct {
|
||||
// Root defines the root of the file system to be used for locating mounts
|
||||
Root string
|
||||
// DebugFilePath defines a log file to print debug output to
|
||||
DebugFilePath string
|
||||
// RuntimePath defines the path to an OCI compliant runtime
|
||||
RuntimePath string
|
||||
// LogLevel defines the logging level for the application
|
||||
LogLevel string
|
||||
}
|
||||
|
||||
//go:generate moq -stub -out config_mock.go . configUpdater
|
||||
|
||||
// configUpdate represents an interface for applying updates to a config.
|
||||
type configUpdater interface {
|
||||
Update(*Config) error
|
||||
}
|
||||
|
||||
// GetConfig returns a config struct with the values resolved. The values are defined in order of
|
||||
// priority:
|
||||
// 1. From the associated environment variables
|
||||
// 2. From the loaded config file
|
||||
// 3. From the default values defined in the `defaultConfig` function
|
||||
func GetConfig(logger *log.Logger) (*Config, error) {
|
||||
cfg := &Config{}
|
||||
|
||||
configs := newListWithLogger(logger,
|
||||
newDefaultConfig(),
|
||||
newDefaultConfigFileWithLogger(logger),
|
||||
newConfigFromEnvironment(),
|
||||
)
|
||||
|
||||
err := configs.Update(cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting config: %v", err)
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that configUpdaterMock does implement configUpdater.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ configUpdater = &configUpdaterMock{}
|
||||
|
||||
// configUpdaterMock is a mock implementation of configUpdater.
|
||||
//
|
||||
// func TestSomethingThatUsesconfigUpdater(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked configUpdater
|
||||
// mockedconfigUpdater := &configUpdaterMock{
|
||||
// UpdateFunc: func(config *Config) error {
|
||||
// panic("mock out the Update method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedconfigUpdater in code that requires configUpdater
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type configUpdaterMock struct {
|
||||
// UpdateFunc mocks the Update method.
|
||||
UpdateFunc func(config *Config) error
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Update holds details about calls to the Update method.
|
||||
Update []struct {
|
||||
// Config is the config argument value.
|
||||
Config *Config
|
||||
}
|
||||
}
|
||||
lockUpdate sync.RWMutex
|
||||
}
|
||||
|
||||
// Update calls UpdateFunc.
|
||||
func (mock *configUpdaterMock) Update(config *Config) error {
|
||||
callInfo := struct {
|
||||
Config *Config
|
||||
}{
|
||||
Config: config,
|
||||
}
|
||||
mock.lockUpdate.Lock()
|
||||
mock.calls.Update = append(mock.calls.Update, callInfo)
|
||||
mock.lockUpdate.Unlock()
|
||||
if mock.UpdateFunc == nil {
|
||||
var (
|
||||
errOut error
|
||||
)
|
||||
return errOut
|
||||
}
|
||||
return mock.UpdateFunc(config)
|
||||
}
|
||||
|
||||
// UpdateCalls gets all the calls that were made to Update.
|
||||
// Check the length with:
|
||||
// len(mockedconfigUpdater.UpdateCalls())
|
||||
func (mock *configUpdaterMock) UpdateCalls() []struct {
|
||||
Config *Config
|
||||
} {
|
||||
var calls []struct {
|
||||
Config *Config
|
||||
}
|
||||
mock.lockUpdate.RLock()
|
||||
calls = mock.calls.Update
|
||||
mock.lockUpdate.RUnlock()
|
||||
return calls
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
testDir := path.Join(wd, "test")
|
||||
filename := path.Join(testDir, configFileRelativePath)
|
||||
|
||||
previousConfig, present := os.LookupEnv(configOverride)
|
||||
os.Setenv(configOverride, testDir)
|
||||
defer func() {
|
||||
if present {
|
||||
os.Setenv(configOverride, previousConfig)
|
||||
} else {
|
||||
os.Unsetenv(configOverride)
|
||||
}
|
||||
}()
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
cfg, err := GetConfig(logger)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/nvidia-container-toolkit.log", cfg.DebugFilePath)
|
||||
}
|
||||
47
cmd/nvidia-container-runtime.experimental/config/default.go
Normal file
47
cmd/nvidia-container-runtime.experimental/config/default.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type defaultConfig struct{}
|
||||
|
||||
var _ configUpdater = (*defaultConfig)(nil)
|
||||
|
||||
func newDefaultConfig() configUpdater {
|
||||
c := defaultConfig{}
|
||||
return &c
|
||||
}
|
||||
|
||||
// Update defines the the default values for the config options
|
||||
func (c defaultConfig) Update(cfg *Config) error {
|
||||
*cfg = Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: log.InfoLevel.String(),
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getDefaultConfig() *Config {
|
||||
cfg := &Config{}
|
||||
|
||||
defaultConfig{}.Update(cfg)
|
||||
|
||||
return cfg
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDefaultConfigUpdate(t *testing.T) {
|
||||
cfg := &Config{}
|
||||
|
||||
require.Empty(t, cfg.DebugFilePath)
|
||||
|
||||
c := defaultConfig{}
|
||||
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/dev/null", cfg.DebugFilePath)
|
||||
require.Equal(t, "", cfg.Root)
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
debugFilePathEnvvarName = "NVIDIA_CONTAINER_RUNTIME_DEBUG"
|
||||
runtimePathEnvvarName = "NVIDIA_CONTAINER_RUNTIME_PATH"
|
||||
rootEnvvarName = "NVIDIA_CONTAINER_RUNTIME_ROOT"
|
||||
logLevelEnvvarName = "NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"
|
||||
)
|
||||
|
||||
type envConfig struct{}
|
||||
|
||||
func newConfigFromEnvironment() configUpdater {
|
||||
c := envConfig{}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c envConfig) Update(cfg *Config) error {
|
||||
debugFilePathEnvvar, exists := os.LookupEnv(debugFilePathEnvvarName)
|
||||
if exists && strings.TrimSpace(debugFilePathEnvvar) != "" {
|
||||
cfg.DebugFilePath = debugFilePathEnvvar
|
||||
}
|
||||
|
||||
runtimePathEnvvar, exists := os.LookupEnv(runtimePathEnvvarName)
|
||||
if exists && strings.TrimSpace(runtimePathEnvvar) != "" {
|
||||
cfg.RuntimePath = runtimePathEnvvar
|
||||
}
|
||||
|
||||
rootEnvvar, exists := os.LookupEnv(rootEnvvarName)
|
||||
if exists && strings.TrimSpace(rootEnvvar) != "" {
|
||||
cfg.Root = rootEnvvar
|
||||
}
|
||||
|
||||
logLevelEnvvar, exists := os.LookupEnv(logLevelEnvvarName)
|
||||
if exists && strings.TrimSpace(logLevelEnvvar) != "" {
|
||||
cfg.LogLevel = logLevelEnvvar
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
29
cmd/nvidia-container-runtime.experimental/config/noop.go
Normal file
29
cmd/nvidia-container-runtime.experimental/config/noop.go
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
// noop implements a configUpdater that does not update a config.
|
||||
type noop struct{}
|
||||
|
||||
func newNoopConfigUpdater() configUpdater {
|
||||
c := noop{}
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c noop) Update(cfg *Config) error {
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNoopDoesNotModifyConfig(t *testing.T) {
|
||||
cfg := &Config{
|
||||
DebugFilePath: "test-path",
|
||||
}
|
||||
|
||||
c := newNoopConfigUpdater()
|
||||
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "test-path", cfg.DebugFilePath)
|
||||
}
|
||||
158
cmd/nvidia-container-runtime.experimental/config/toml.go
Normal file
158
cmd/nvidia-container-runtime.experimental/config/toml.go
Normal file
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
configFileRelativePath = "nvidia-container-runtime/config.toml"
|
||||
configOverride = "XDG_CONFIG_HOME"
|
||||
defaultConfigRoot = "/etc"
|
||||
|
||||
nvidiaContainerCliSection = "nvidia-container-cli"
|
||||
nvidiaContainerRuntimeConfigSection = "nvidia-container-runtime"
|
||||
nvidiaContainerRuntimeExperimentalConfigSection = "nvidia-container-runtime.experimental"
|
||||
)
|
||||
|
||||
type tomlConfig struct {
|
||||
logger *log.Logger
|
||||
path string
|
||||
sections []tomlSection
|
||||
}
|
||||
|
||||
type tomlSection struct {
|
||||
section string
|
||||
keys map[string]struct{}
|
||||
}
|
||||
|
||||
func newDefaultConfigFileWithLogger(logger *log.Logger) configUpdater {
|
||||
configDir := defaultConfigRoot
|
||||
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
||||
configDir = XDGConfigDir
|
||||
}
|
||||
configFilePath := filepath.Join(configDir, configFileRelativePath)
|
||||
|
||||
return newConfigFromFileWithLogger(logger, configFilePath)
|
||||
}
|
||||
|
||||
func newConfigFromFileWithLogger(logger *log.Logger, filepath string) configUpdater {
|
||||
if _, err := os.Stat(filepath); os.IsNotExist(err) {
|
||||
logger.Warnf("The config file '%v' does not exist", filepath)
|
||||
return newNoopConfigUpdater()
|
||||
}
|
||||
|
||||
sections := []tomlSection{
|
||||
{
|
||||
section: nvidiaContainerRuntimeConfigSection,
|
||||
},
|
||||
{
|
||||
section: nvidiaContainerRuntimeExperimentalConfigSection,
|
||||
},
|
||||
{
|
||||
section: nvidiaContainerCliSection,
|
||||
keys: map[string]struct{}{
|
||||
"root": {},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
path: filepath,
|
||||
sections: sections,
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c tomlConfig) Update(cfg *Config) error {
|
||||
configFile, err := os.Open(c.path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening config file %v: %v", c.path, err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
|
||||
return c.updateFromReader(cfg, configFile)
|
||||
}
|
||||
|
||||
func (c tomlConfig) updateFromReader(cfg *Config, reader io.Reader) error {
|
||||
toml, err := toml.LoadReader(reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading TOML contents: %v", err)
|
||||
}
|
||||
|
||||
for _, section := range c.sections {
|
||||
if v, ok := section.GetStringFrom(toml, "debug"); ok {
|
||||
cfg.DebugFilePath = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "runtime-path"); ok {
|
||||
cfg.RuntimePath = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "root"); ok {
|
||||
cfg.Root = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "log-level"); ok {
|
||||
cfg.LogLevel = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c tomlSection) GetStringFrom(toml *toml.Tree, key string) (string, bool) {
|
||||
value := c.GetFrom(toml, key)
|
||||
if value != nil {
|
||||
if v, ok := value.(string); ok {
|
||||
return v, ok
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (c tomlSection) GetFrom(toml *toml.Tree, key string) interface{} {
|
||||
if !c.validKey(key) {
|
||||
return nil
|
||||
}
|
||||
return toml.Get(c.configKey(key))
|
||||
}
|
||||
|
||||
func (c tomlSection) validKey(key string) bool {
|
||||
if c.keys == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
_, exists := c.keys[key]
|
||||
|
||||
return exists
|
||||
}
|
||||
|
||||
func (c tomlSection) configKey(key string) string {
|
||||
if c.section == "" {
|
||||
return key
|
||||
}
|
||||
return c.section + "." + key
|
||||
}
|
||||
259
cmd/nvidia-container-runtime.experimental/config/toml_test.go
Normal file
259
cmd/nvidia-container-runtime.experimental/config/toml_test.go
Normal file
@@ -0,0 +1,259 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"testing/iotest"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestUpdateFromReader(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
readerError bool
|
||||
lines []string
|
||||
expected *Config
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
description: "reader error returns error",
|
||||
readerError: true,
|
||||
expectedError: true,
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
description: "empty config returns defaults",
|
||||
lines: []string{},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "debug output is set",
|
||||
lines: []string{"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "debug output is set in section",
|
||||
lines: []string{"[nvidia-container-runtime]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "blank debug is set",
|
||||
lines: []string{"nvidia-container-runtime.debug=\"\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-string debug is ignored",
|
||||
lines: []string{"nvidia-container-runtime.debug=2"},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "log-level is set",
|
||||
lines: []string{"nvidia-container-runtime.log-level=\"trace\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "trace",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
sections: []tomlSection{
|
||||
{section: nvidiaContainerRuntimeConfigSection},
|
||||
},
|
||||
}
|
||||
var reader io.Reader
|
||||
if tc.readerError {
|
||||
reader = iotest.ErrReader(fmt.Errorf("error"))
|
||||
} else {
|
||||
reader = strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
}
|
||||
|
||||
err := c.updateFromReader(cfg, reader)
|
||||
|
||||
if tc.expectedError {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.EqualValues(t, tc.expected, cfg)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateFromReaderExperimental(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
readerError bool
|
||||
lines []string
|
||||
expected *Config
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"[nvidia-container-runtime]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=\"\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=2"},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{"[nvidia-container-runtime.experimental]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{
|
||||
"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\"",
|
||||
"nvidia-container-runtime.experimental.debug=\"nvidia-container-exp-toolkit.log\"",
|
||||
},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-exp-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
sections: []tomlSection{
|
||||
{section: nvidiaContainerRuntimeExperimentalConfigSection},
|
||||
},
|
||||
}
|
||||
var reader io.Reader
|
||||
if tc.readerError {
|
||||
reader = iotest.ErrReader(fmt.Errorf("error"))
|
||||
} else {
|
||||
reader = strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
}
|
||||
|
||||
err := c.updateFromReader(cfg, reader)
|
||||
|
||||
if tc.expectedError {
|
||||
require.Error(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.NoError(t, err, "%d: %v", i, tc)
|
||||
}
|
||||
require.EqualValues(t, tc.expected, cfg, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigFromFile(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
lines := []string{
|
||||
"[nvidia-container-cli]",
|
||||
"root = \"/run/nvidia/driver\"",
|
||||
"[nvidia-container-runtime]",
|
||||
"#debug = \"/nvidia-container-toolkit.log\"",
|
||||
"",
|
||||
"[nvidia-container-runtime.experimental]",
|
||||
"debug = \"/nvidia-container-toolkit.experimental.log\"",
|
||||
}
|
||||
|
||||
contents := []byte(strings.Join(lines, "\n"))
|
||||
testDir := path.Join(wd, "test")
|
||||
filename := path.Join(testDir, configFileRelativePath)
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
c := newConfigFromFileWithLogger(logger, filename)
|
||||
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
err = c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/nvidia-container-toolkit.experimental.log", cfg.DebugFilePath)
|
||||
|
||||
require.Equal(t, "/run/nvidia/driver", cfg.Root)
|
||||
}
|
||||
|
||||
func TestConfigFromNonexistentFileReturnsNoop(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
c := newConfigFromFileWithLogger(logger, "/does/not/exist")
|
||||
|
||||
n, ok := c.(*noop)
|
||||
|
||||
require.True(t, ok)
|
||||
require.NotNil(t, n)
|
||||
}
|
||||
|
||||
func TestGetConfigKey(t *testing.T) {
|
||||
require.Equal(t, "key", tomlSection{}.configKey("key"))
|
||||
require.Equal(t, "section.key", tomlSection{section: "section"}.configKey("key"))
|
||||
}
|
||||
144
cmd/nvidia-container-runtime.experimental/main.go
Normal file
144
cmd/nvidia-container-runtime.experimental/main.go
Normal file
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ensure"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/filter"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modify"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental/config"
|
||||
)
|
||||
|
||||
const (
|
||||
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
|
||||
visibleDevicesVoid = "void"
|
||||
)
|
||||
|
||||
var logger = log.New()
|
||||
|
||||
func main() {
|
||||
cfg, err := config.GetConfig(logger)
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading config: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if cfg.DebugFilePath != "" && cfg.DebugFilePath != "/dev/null" {
|
||||
logFile, err := os.OpenFile(cfg.DebugFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
logger.Errorf("Error opening debug log file: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer logFile.Close()
|
||||
|
||||
logger.SetOutput(logFile)
|
||||
}
|
||||
|
||||
logLevel, err := log.ParseLevel(cfg.LogLevel)
|
||||
if err == nil {
|
||||
logger.SetLevel(logLevel)
|
||||
} else {
|
||||
logger.Warnf("Invalid log-level '%v'; using '%v'", cfg.LogLevel, logger.Level.String())
|
||||
}
|
||||
|
||||
logger.Infof("Starting nvidia-container-runtime: %v", os.Args)
|
||||
logger.Debugf("Using config=%+v", cfg)
|
||||
|
||||
if err := run(cfg, os.Args); err != nil {
|
||||
logger.Errorf("Error running runtime: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func run(cfg *config.Config, args []string) error {
|
||||
logger.Debugf("running with args=%v", args)
|
||||
|
||||
// We create a low-level runtime
|
||||
lowLevelRuntime, err := createLowLevelRuntime(logger, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing low-level runtime: %v", err)
|
||||
}
|
||||
|
||||
if !oci.HasCreateSubcommand(args) {
|
||||
logger.Infof("No modification of OCI specification required")
|
||||
logger.Infof("Forwarding command to runtime")
|
||||
return lowLevelRuntime.Exec(args)
|
||||
}
|
||||
|
||||
// We create the OCI spec that is to be modified
|
||||
ociSpec, bundleDir, err := oci.NewSpecFromArgs(args)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing OCI spec: %v", err)
|
||||
}
|
||||
|
||||
err = ociSpec.Load()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading OCI specification: %v", err)
|
||||
}
|
||||
|
||||
visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar)
|
||||
if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid {
|
||||
logger.Infof("Using low-level runtime: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists)
|
||||
return lowLevelRuntime.Exec(os.Args)
|
||||
}
|
||||
|
||||
// We create the modifier that will be applied by the Modifying Runtime Wrapper
|
||||
modifier, err := createModifier(cfg.Root, bundleDir, visibleDevices, ociSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing modifer: %v", err)
|
||||
}
|
||||
|
||||
// We construct the Modifying runtime
|
||||
r := runtime.NewModifyingRuntimeWrapperWithLogger(logger, lowLevelRuntime, ociSpec, modifier)
|
||||
|
||||
return r.Exec(os.Args)
|
||||
}
|
||||
|
||||
func createLowLevelRuntime(logger *log.Logger, cfg *config.Config) (oci.Runtime, error) {
|
||||
if cfg.RuntimePath == "" {
|
||||
return oci.NewLowLevelRuntimeWithLogger(logger, "docker-runc", "runc")
|
||||
}
|
||||
logger.Infof("Creating runtime with path %v", cfg.RuntimePath)
|
||||
return oci.NewRuntimeForPathWithLogger(logger, cfg.RuntimePath)
|
||||
}
|
||||
|
||||
func createModifier(root string, bundleDir string, visibleDevices string, env filter.EnvLookup) (modify.Modifier, error) {
|
||||
// We set up the modifier using discovery
|
||||
discovered, err := discover.NewNVMLServerWithLogger(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering devices: %v", err)
|
||||
}
|
||||
|
||||
// We apply a filter to the discovered devices
|
||||
selected := filter.NewSelectDevicesFromWithLogger(logger, discovered, visibleDevices, env)
|
||||
|
||||
// We ensure that the selected devices are available
|
||||
available := ensure.NewEnsureDevicesWithLogger(logger, selected, root)
|
||||
|
||||
// We construct the modifer for the OCI spec
|
||||
modifier := modify.NewModifierWithLoggerFor(logger, available, root, bundleDir)
|
||||
|
||||
return modifier, nil
|
||||
}
|
||||
@@ -66,22 +66,9 @@ func (r nvidiaContainerRuntime) Exec(args []string) error {
|
||||
// modificationRequired checks the intput arguments to determine whether a modification
|
||||
// to the OCI spec is required.
|
||||
func (r nvidiaContainerRuntime) modificationRequired(args []string) bool {
|
||||
var previousWasBundle bool
|
||||
for _, a := range args {
|
||||
// We check for '--bundle create' explicitly to ensure that we
|
||||
// don't inadvertently trigger a modification if the bundle directory
|
||||
// is specified as `create`
|
||||
if !previousWasBundle && isBundleFlag(a) {
|
||||
previousWasBundle = true
|
||||
continue
|
||||
}
|
||||
|
||||
if !previousWasBundle && a == "create" {
|
||||
r.logger.Infof("'create' command detected; modification required")
|
||||
return true
|
||||
}
|
||||
|
||||
previousWasBundle = false
|
||||
if oci.HasCreateSubcommand(args) {
|
||||
r.logger.Infof("'create' command detected; modification required")
|
||||
return true
|
||||
}
|
||||
|
||||
r.logger.Infof("No modification required")
|
||||
|
||||
@@ -27,32 +27,6 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestArgsGetConfigFilePath(t *testing.T) {
|
||||
testCases := []struct {
|
||||
bundleDir string
|
||||
ociSpecPath string
|
||||
}{
|
||||
{
|
||||
ociSpecPath: "config.json",
|
||||
},
|
||||
{
|
||||
bundleDir: "/foo/bar",
|
||||
ociSpecPath: "/foo/bar/config.json",
|
||||
},
|
||||
{
|
||||
bundleDir: "/foo/bar/",
|
||||
ociSpecPath: "/foo/bar/config.json",
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
cp, err := getOCISpecFilePath(tc.bundleDir)
|
||||
|
||||
require.NoErrorf(t, err, "%d: %v", i, tc)
|
||||
require.Equalf(t, tc.ociSpecPath, cp, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddNvidiaHook(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
shim := nvidiaContainerRuntime{
|
||||
@@ -181,9 +155,14 @@ func TestNvidiaContainerRuntime(t *testing.T) {
|
||||
tc.shim.logger = logger
|
||||
hook.Reset()
|
||||
|
||||
spec := &specs.Spec{}
|
||||
ociMock := oci.NewMockSpec(spec, tc.writeError, tc.modifyError)
|
||||
|
||||
ociMock := &oci.SpecMock{
|
||||
ModifyFunc: func(specModifier oci.SpecModifier) error {
|
||||
return tc.modifyError
|
||||
},
|
||||
FlushFunc: func() error {
|
||||
return tc.writeError
|
||||
},
|
||||
}
|
||||
require.Equal(t, tc.shouldModify, tc.shim.modificationRequired(tc.args), "%d: %v", i, tc)
|
||||
|
||||
tc.shim.ociSpec = ociMock
|
||||
@@ -197,18 +176,16 @@ func TestNvidiaContainerRuntime(t *testing.T) {
|
||||
}
|
||||
|
||||
if tc.shouldModify {
|
||||
require.Equal(t, 1, ociMock.MockModify.Callcount, "%d: %v", i, tc)
|
||||
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "%d: %v", i, tc)
|
||||
require.Equal(t, 1, len(ociMock.ModifyCalls()), "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Equal(t, 0, ociMock.MockModify.Callcount, "%d: %v", i, tc)
|
||||
require.Nil(t, spec.Hooks, "%d: %v", i, tc)
|
||||
require.Equal(t, 0, len(ociMock.ModifyCalls()), "%d: %v", i, tc)
|
||||
}
|
||||
|
||||
writeExpected := tc.shouldModify && tc.modifyError == nil
|
||||
if writeExpected {
|
||||
require.Equal(t, 1, ociMock.MockFlush.Callcount, "%d: %v", i, tc)
|
||||
require.Equal(t, 1, len(ociMock.FlushCalls()), "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Equal(t, 0, ociMock.MockFlush.Callcount, "%d: %v", i, tc)
|
||||
require.Equal(t, 0, len(ociMock.FlushCalls()), "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,9 +18,6 @@ package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
@@ -53,15 +50,15 @@ func newRuntime(argv []string) (oci.Runtime, error) {
|
||||
|
||||
// newOCISpec constructs an OCI spec for the provided arguments
|
||||
func newOCISpec(argv []string) (oci.Spec, error) {
|
||||
bundlePath, err := getBundlePath(argv)
|
||||
bundleDir, err := oci.GetBundleDir(argv)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing command line arguments: %v", err)
|
||||
}
|
||||
logger.Infof("Using bundle directory: %v", bundleDir)
|
||||
|
||||
ociSpecPath := oci.GetSpecFilePath(bundleDir)
|
||||
logger.Infof("Using OCI specification file path: %v", ociSpecPath)
|
||||
|
||||
ociSpecPath, err := getOCISpecFilePath(bundlePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting OCI specification file path: %v", err)
|
||||
}
|
||||
ociSpec := oci.NewSpecFromFile(ociSpecPath)
|
||||
|
||||
return ociSpec, nil
|
||||
@@ -69,98 +66,9 @@ func newOCISpec(argv []string) (oci.Spec, error) {
|
||||
|
||||
// newRuncRuntime locates the runc binary and wraps it in a SyscallExecRuntime
|
||||
func newRuncRuntime() (oci.Runtime, error) {
|
||||
runtimePath, err := findRunc()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error locating runtime: %v", err)
|
||||
}
|
||||
|
||||
runc, err := oci.NewSyscallExecRuntimeWithLogger(logger.Logger, runtimePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing runtime: %v", err)
|
||||
}
|
||||
|
||||
return runc, nil
|
||||
}
|
||||
|
||||
// getBundlePath checks the specified slice of strings (argv) for a 'bundle' flag as allowed by runc.
|
||||
// The following are supported:
|
||||
// --bundle{{SEP}}BUNDLE_PATH
|
||||
// -bundle{{SEP}}BUNDLE_PATH
|
||||
// -b{{SEP}}BUNDLE_PATH
|
||||
// where {{SEP}} is either ' ' or '='
|
||||
func getBundlePath(argv []string) (string, error) {
|
||||
var bundlePath string
|
||||
|
||||
for i := 0; i < len(argv); i++ {
|
||||
param := argv[i]
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
if !isBundleFlag(parts[0]) {
|
||||
continue
|
||||
}
|
||||
|
||||
// The flag has the format --bundle=/path
|
||||
if len(parts) == 2 {
|
||||
bundlePath = parts[1]
|
||||
continue
|
||||
}
|
||||
|
||||
// The flag has the format --bundle /path
|
||||
if i+1 < len(argv) {
|
||||
bundlePath = argv[i+1]
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// --bundle / -b was the last element of argv
|
||||
return "", fmt.Errorf("bundle option requires an argument")
|
||||
}
|
||||
|
||||
return bundlePath, nil
|
||||
}
|
||||
|
||||
// findRunc locates runc in the path, returning the full path to the
|
||||
// binary or an error.
|
||||
func findRunc() (string, error) {
|
||||
runtimeCandidates := []string{
|
||||
return oci.NewLowLevelRuntimeWithLogger(
|
||||
logger.Logger,
|
||||
dockerRuncExecutableName,
|
||||
runcExecutableName,
|
||||
}
|
||||
|
||||
return findRuntime(runtimeCandidates)
|
||||
}
|
||||
|
||||
func findRuntime(runtimeCandidates []string) (string, error) {
|
||||
for _, candidate := range runtimeCandidates {
|
||||
logger.Infof("Looking for runtime binary '%v'", candidate)
|
||||
runcPath, err := exec.LookPath(candidate)
|
||||
if err == nil {
|
||||
logger.Infof("Found runtime binary '%v'", runcPath)
|
||||
return runcPath, nil
|
||||
}
|
||||
logger.Warnf("Runtime binary '%v' not found: %v", candidate, err)
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("no runtime binary found from candidate list: %v", runtimeCandidates)
|
||||
}
|
||||
|
||||
func isBundleFlag(arg string) bool {
|
||||
if !strings.HasPrefix(arg, "-") {
|
||||
return false
|
||||
}
|
||||
|
||||
trimmed := strings.TrimLeft(arg, "-")
|
||||
return trimmed == "b" || trimmed == "bundle"
|
||||
}
|
||||
|
||||
// getOCISpecFilePath returns the expected path to the OCI specification file for the given
|
||||
// bundle directory. If the bundle directory is empty, only `config.json` is returned.
|
||||
func getOCISpecFilePath(bundleDir string) (string, error) {
|
||||
logger.Infof("Using bundle directory: %v", bundleDir)
|
||||
|
||||
OCISpecFilePath := filepath.Join(bundleDir, ociSpecFileName)
|
||||
|
||||
logger.Infof("Using OCI specification file path: %v", OCISpecFilePath)
|
||||
|
||||
return OCISpecFilePath, nil
|
||||
)
|
||||
}
|
||||
|
||||
@@ -17,10 +17,8 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@@ -30,163 +28,3 @@ func TestConstructor(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, shim)
|
||||
}
|
||||
|
||||
func TestGetBundlePath(t *testing.T) {
|
||||
type expected struct {
|
||||
bundle string
|
||||
isError bool
|
||||
}
|
||||
testCases := []struct {
|
||||
argv []string
|
||||
expected expected
|
||||
}{
|
||||
{
|
||||
argv: []string{},
|
||||
},
|
||||
{
|
||||
argv: []string{"create"},
|
||||
},
|
||||
{
|
||||
argv: []string{"--bundle"},
|
||||
expected: expected{
|
||||
isError: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"-b"},
|
||||
expected: expected{
|
||||
isError: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"--bundle", "/foo/bar"},
|
||||
expected: expected{
|
||||
bundle: "/foo/bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"--not-bundle", "/foo/bar"},
|
||||
},
|
||||
{
|
||||
argv: []string{"--"},
|
||||
},
|
||||
{
|
||||
argv: []string{"-bundle", "/foo/bar"},
|
||||
expected: expected{
|
||||
bundle: "/foo/bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"--bundle=/foo/bar"},
|
||||
expected: expected{
|
||||
bundle: "/foo/bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"-b=/foo/bar"},
|
||||
expected: expected{
|
||||
bundle: "/foo/bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"-b=/foo/=bar"},
|
||||
expected: expected{
|
||||
bundle: "/foo/=bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"-b", "/foo/bar"},
|
||||
expected: expected{
|
||||
bundle: "/foo/bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"create", "-b", "/foo/bar"},
|
||||
expected: expected{
|
||||
bundle: "/foo/bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"-b", "create", "create"},
|
||||
expected: expected{
|
||||
bundle: "create",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"-b=create", "create"},
|
||||
expected: expected{
|
||||
bundle: "create",
|
||||
},
|
||||
},
|
||||
{
|
||||
argv: []string{"-b", "create"},
|
||||
expected: expected{
|
||||
bundle: "create",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
bundle, err := getBundlePath(tc.argv)
|
||||
|
||||
if tc.expected.isError {
|
||||
require.Errorf(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.NoErrorf(t, err, "%d: %v", i, tc)
|
||||
}
|
||||
|
||||
require.Equalf(t, tc.expected.bundle, bundle, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindRunc(t *testing.T) {
|
||||
testLogger, _ := testlog.NewNullLogger()
|
||||
logger.Logger = testLogger
|
||||
|
||||
runcPath, err := findRunc()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, filepath.Join(cfg.binPath, runcExecutableName), runcPath)
|
||||
}
|
||||
|
||||
func TestFindRuntime(t *testing.T) {
|
||||
testLogger, _ := testlog.NewNullLogger()
|
||||
logger.Logger = testLogger
|
||||
|
||||
testCases := []struct {
|
||||
candidates []string
|
||||
expectedPath string
|
||||
}{
|
||||
{
|
||||
candidates: []string{},
|
||||
},
|
||||
{
|
||||
candidates: []string{"not-runc"},
|
||||
},
|
||||
{
|
||||
candidates: []string{"not-runc", "also-not-runc"},
|
||||
},
|
||||
{
|
||||
candidates: []string{runcExecutableName},
|
||||
expectedPath: filepath.Join(cfg.binPath, runcExecutableName),
|
||||
},
|
||||
{
|
||||
candidates: []string{runcExecutableName, "not-runc"},
|
||||
expectedPath: filepath.Join(cfg.binPath, runcExecutableName),
|
||||
},
|
||||
{
|
||||
candidates: []string{"not-runc", runcExecutableName},
|
||||
expectedPath: filepath.Join(cfg.binPath, runcExecutableName),
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
runcPath, err := findRuntime(tc.candidates)
|
||||
if tc.expectedPath == "" {
|
||||
require.Error(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.NoError(t, err, "%d: %v", i, tc)
|
||||
}
|
||||
require.Equal(t, tc.expectedPath, runcPath, "%d: %v", i, tc)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,6 +2,15 @@ package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
allDriverCapabilities = DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx")
|
||||
defaultDriverCapabilities = DriverCapabilities("utility,compute")
|
||||
|
||||
none = DriverCapabilities("")
|
||||
all = DriverCapabilities("all")
|
||||
)
|
||||
|
||||
func capabilityToCLI(cap string) string {
|
||||
@@ -25,3 +34,50 @@ func capabilityToCLI(cap string) string {
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// DriverCapabilities is used to process the NVIDIA_DRIVER_CAPABILITIES environment
|
||||
// variable. Operations include default values, filtering, and handling meta values such as "all"
|
||||
type DriverCapabilities string
|
||||
|
||||
// Intersection returns intersection between two sets of capabilities.
|
||||
func (d DriverCapabilities) Intersection(capabilities DriverCapabilities) DriverCapabilities {
|
||||
if capabilities == all {
|
||||
return d
|
||||
}
|
||||
if d == all {
|
||||
return capabilities
|
||||
}
|
||||
|
||||
lookup := make(map[string]bool)
|
||||
for _, c := range d.list() {
|
||||
lookup[c] = true
|
||||
}
|
||||
var found []string
|
||||
for _, c := range capabilities.list() {
|
||||
if lookup[c] {
|
||||
found = append(found, c)
|
||||
}
|
||||
}
|
||||
|
||||
intersection := DriverCapabilities(strings.Join(found, ","))
|
||||
return intersection
|
||||
}
|
||||
|
||||
// String returns the string representation of the driver capabilities
|
||||
func (d DriverCapabilities) String() string {
|
||||
return string(d)
|
||||
}
|
||||
|
||||
// list returns the driver capabilities as a list
|
||||
func (d DriverCapabilities) list() []string {
|
||||
var caps []string
|
||||
for _, c := range strings.Split(string(d), ",") {
|
||||
trimmed := strings.TrimSpace(c)
|
||||
if len(trimmed) == 0 {
|
||||
continue
|
||||
}
|
||||
caps = append(caps, trimmed)
|
||||
}
|
||||
|
||||
return caps
|
||||
}
|
||||
|
||||
134
cmd/nvidia-container-toolkit/capabilities_test.go
Normal file
134
cmd/nvidia-container-toolkit/capabilities_test.go
Normal file
@@ -0,0 +1,134 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDriverCapabilitiesIntersection(t *testing.T) {
|
||||
testCases := []struct {
|
||||
capabilities DriverCapabilities
|
||||
supportedCapabilities DriverCapabilities
|
||||
expectedIntersection DriverCapabilities
|
||||
}{
|
||||
{
|
||||
capabilities: none,
|
||||
supportedCapabilities: none,
|
||||
expectedIntersection: none,
|
||||
},
|
||||
{
|
||||
capabilities: all,
|
||||
supportedCapabilities: none,
|
||||
expectedIntersection: none,
|
||||
},
|
||||
{
|
||||
capabilities: all,
|
||||
supportedCapabilities: allDriverCapabilities,
|
||||
expectedIntersection: allDriverCapabilities,
|
||||
},
|
||||
{
|
||||
capabilities: allDriverCapabilities,
|
||||
supportedCapabilities: all,
|
||||
expectedIntersection: allDriverCapabilities,
|
||||
},
|
||||
{
|
||||
capabilities: none,
|
||||
supportedCapabilities: all,
|
||||
expectedIntersection: none,
|
||||
},
|
||||
{
|
||||
capabilities: none,
|
||||
supportedCapabilities: DriverCapabilities("cap1"),
|
||||
expectedIntersection: none,
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities("cap0,cap1"),
|
||||
supportedCapabilities: DriverCapabilities("cap1,cap0"),
|
||||
expectedIntersection: DriverCapabilities("cap0,cap1"),
|
||||
},
|
||||
{
|
||||
capabilities: defaultDriverCapabilities,
|
||||
supportedCapabilities: allDriverCapabilities,
|
||||
expectedIntersection: defaultDriverCapabilities,
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
|
||||
supportedCapabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
|
||||
expectedIntersection: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities("cap1"),
|
||||
supportedCapabilities: none,
|
||||
expectedIntersection: none,
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
|
||||
supportedCapabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
|
||||
expectedIntersection: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
intersection := tc.supportedCapabilities.Intersection(tc.capabilities)
|
||||
require.EqualValues(t, tc.expectedIntersection, intersection)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDriverCapabilitiesList(t *testing.T) {
|
||||
testCases := []struct {
|
||||
capabilities DriverCapabilities
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
capabilities: DriverCapabilities(""),
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities(" "),
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities(","),
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities(",cap"),
|
||||
expected: []string{"cap"},
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities("cap,"),
|
||||
expected: []string{"cap"},
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities("cap0,,cap1"),
|
||||
expected: []string{"cap0", "cap1"},
|
||||
},
|
||||
{
|
||||
capabilities: DriverCapabilities("cap1,cap0,cap3"),
|
||||
expected: []string{"cap1", "cap0", "cap3"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
require.EqualValues(t, tc.expected, tc.capabilities.list())
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -26,11 +26,6 @@ const (
|
||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
)
|
||||
|
||||
const (
|
||||
allDriverCapabilities = "compute,compat32,graphics,utility,video,display,ngx"
|
||||
defaultDriverCapabilities = "utility,compute"
|
||||
)
|
||||
|
||||
const (
|
||||
capSysAdmin = "CAP_SYS_ADMIN"
|
||||
)
|
||||
@@ -316,33 +311,27 @@ func getMigMonitorDevices(env map[string]string) *string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getDriverCapabilities(env map[string]string, legacyImage bool) *string {
|
||||
// Grab a reference to the capabilities from the envvar
|
||||
// if it actually exists in the environment.
|
||||
var capabilities *string
|
||||
if caps, ok := env[envNVDriverCapabilities]; ok {
|
||||
capabilities = &caps
|
||||
func getDriverCapabilities(env map[string]string, supportedDriverCapabilities DriverCapabilities, legacyImage bool) DriverCapabilities {
|
||||
// We use the default driver capabilities by default. This is filtered to only include the
|
||||
// supported capabilities
|
||||
capabilities := supportedDriverCapabilities.Intersection(defaultDriverCapabilities)
|
||||
|
||||
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
|
||||
|
||||
if !capsEnvSpecified && legacyImage {
|
||||
// Environment variable unset with legacy image: set all capabilities.
|
||||
return supportedDriverCapabilities
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: set all capabilities.
|
||||
if capabilities == nil && legacyImage {
|
||||
allCaps := allDriverCapabilities
|
||||
return &allCaps
|
||||
if capsEnvSpecified && len(capsEnv) > 0 {
|
||||
// If the envvironment variable is specified and is non-empty, use the capabilities value
|
||||
envCapabilities := DriverCapabilities(capsEnv)
|
||||
capabilities = supportedDriverCapabilities.Intersection(envCapabilities)
|
||||
if envCapabilities != all && capabilities != envCapabilities {
|
||||
log.Panicln(fmt.Errorf("unsupported capabilities found in '%v' (allowed '%v')", envCapabilities, capabilities))
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset or set but empty: set default capabilities.
|
||||
if capabilities == nil || len(*capabilities) == 0 {
|
||||
defaultCaps := defaultDriverCapabilities
|
||||
return &defaultCaps
|
||||
}
|
||||
|
||||
// Environment variable set to "all": set all capabilities.
|
||||
if *capabilities == "all" {
|
||||
allCaps := allDriverCapabilities
|
||||
return &allCaps
|
||||
}
|
||||
|
||||
// Any other value
|
||||
return capabilities
|
||||
}
|
||||
|
||||
@@ -389,10 +378,7 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
var driverCapabilities string
|
||||
if c := getDriverCapabilities(env, legacyImage); c != nil {
|
||||
driverCapabilities = *c
|
||||
}
|
||||
driverCapabilities := getDriverCapabilities(env, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
|
||||
requirements := getRequirements(env, legacyImage)
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description string
|
||||
env map[string]string
|
||||
privileged bool
|
||||
hookConfig *HookConfig
|
||||
expectedConfig *nvidiaConfig
|
||||
expectedPanic bool
|
||||
}{
|
||||
@@ -35,7 +36,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
DriverCapabilities: allDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -49,7 +50,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
DriverCapabilities: allDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -81,7 +82,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
DriverCapabilities: allDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -95,7 +96,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
DriverCapabilities: allDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -110,7 +111,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -125,7 +126,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
DriverCapabilities: allDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -135,12 +136,12 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVDriverCapabilities: "video,display",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
DriverCapabilities: "video,display",
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -150,14 +151,14 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVDriverCapabilities: "video,display",
|
||||
envNVRequirePrefix + "REQ0": "req0=true",
|
||||
envNVRequirePrefix + "REQ1": "req1=false",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
DriverCapabilities: "video,display",
|
||||
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -167,7 +168,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVDriverCapabilities: "video,display",
|
||||
envNVRequirePrefix + "REQ0": "req0=true",
|
||||
envNVRequirePrefix + "REQ1": "req1=false",
|
||||
envNVDisableRequire: "true",
|
||||
@@ -175,7 +176,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
DriverCapabilities: "video,display",
|
||||
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
|
||||
DisableRequire: true,
|
||||
},
|
||||
@@ -206,7 +207,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -238,7 +239,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -252,7 +253,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -267,7 +268,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -282,7 +283,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
DriverCapabilities: allDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -292,12 +293,12 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVDriverCapabilities: "video,display",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
DriverCapabilities: "video,display",
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -307,14 +308,14 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVDriverCapabilities: "video,display",
|
||||
envNVRequirePrefix + "REQ0": "req0=true",
|
||||
envNVRequirePrefix + "REQ1": "req1=false",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
DriverCapabilities: "video,display",
|
||||
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -324,7 +325,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVDriverCapabilities: "video,display",
|
||||
envNVRequirePrefix + "REQ0": "req0=true",
|
||||
envNVRequirePrefix + "REQ1": "req1=false",
|
||||
envNVDisableRequire: "true",
|
||||
@@ -332,7 +333,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
DriverCapabilities: "video,display",
|
||||
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
|
||||
DisableRequire: true,
|
||||
},
|
||||
@@ -346,7 +347,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
Requirements: []string{},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -362,7 +363,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
MigConfigDevices: "mig0,mig1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -388,7 +389,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
MigMonitorDevices: "mig0,mig1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
@@ -403,14 +404,62 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
privileged: false,
|
||||
expectedPanic: true,
|
||||
},
|
||||
{
|
||||
description: "Hook config set as driver-capabilities-all",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
envNVDriverCapabilities: "all",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SupportedDriverCapabilities: "video,display",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: "video,display",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, envvar sets driver-capabilities",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
envNVDriverCapabilities: "video,display",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SupportedDriverCapabilities: "video,display,compute,utility",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: "video,display",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, envvar unset sets default driver-capabilities",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Wrap the call to getNvidiaConfig() in a closure.
|
||||
var config *nvidiaConfig
|
||||
getConfig := func() {
|
||||
hookConfig := getDefaultHookConfig()
|
||||
config = getNvidiaConfig(&hookConfig, tc.env, nil, tc.privileged)
|
||||
hookConfig := tc.hookConfig
|
||||
if hookConfig == nil {
|
||||
defaultConfig := getDefaultHookConfig()
|
||||
hookConfig = &defaultConfig
|
||||
}
|
||||
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
|
||||
}
|
||||
|
||||
// For any tests that are expected to panic, make sure they do.
|
||||
@@ -822,3 +871,119 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDriverCapabilities(t *testing.T) {
|
||||
|
||||
supportedCapabilities := "compute,utility,display,video"
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
env map[string]string
|
||||
legacyImage bool
|
||||
supportedCapabilities string
|
||||
expectedPanic bool
|
||||
expectedCapabilities string
|
||||
}{
|
||||
{
|
||||
description: "Env is set for legacy image",
|
||||
env: map[string]string{
|
||||
envNVDriverCapabilities: "display,video",
|
||||
},
|
||||
legacyImage: true,
|
||||
supportedCapabilities: supportedCapabilities,
|
||||
expectedCapabilities: "display,video",
|
||||
},
|
||||
{
|
||||
description: "Env is all for legacy image",
|
||||
env: map[string]string{
|
||||
envNVDriverCapabilities: "all",
|
||||
},
|
||||
legacyImage: true,
|
||||
supportedCapabilities: supportedCapabilities,
|
||||
expectedCapabilities: supportedCapabilities,
|
||||
},
|
||||
{
|
||||
description: "Env is empty for legacy image",
|
||||
env: map[string]string{
|
||||
envNVDriverCapabilities: "",
|
||||
},
|
||||
legacyImage: true,
|
||||
supportedCapabilities: supportedCapabilities,
|
||||
expectedCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
{
|
||||
description: "Env unset for legacy image is 'all'",
|
||||
env: map[string]string{},
|
||||
legacyImage: true,
|
||||
supportedCapabilities: supportedCapabilities,
|
||||
expectedCapabilities: supportedCapabilities,
|
||||
},
|
||||
{
|
||||
description: "Env is set for modern image",
|
||||
env: map[string]string{
|
||||
envNVDriverCapabilities: "display,video",
|
||||
},
|
||||
legacyImage: false,
|
||||
supportedCapabilities: supportedCapabilities,
|
||||
expectedCapabilities: "display,video",
|
||||
},
|
||||
{
|
||||
description: "Env unset for modern image is default",
|
||||
env: map[string]string{},
|
||||
legacyImage: false,
|
||||
supportedCapabilities: supportedCapabilities,
|
||||
expectedCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
{
|
||||
description: "Env is all for modern image",
|
||||
env: map[string]string{
|
||||
envNVDriverCapabilities: "all",
|
||||
},
|
||||
legacyImage: false,
|
||||
supportedCapabilities: supportedCapabilities,
|
||||
expectedCapabilities: supportedCapabilities,
|
||||
},
|
||||
{
|
||||
description: "Env is empty for modern image",
|
||||
env: map[string]string{
|
||||
envNVDriverCapabilities: "",
|
||||
},
|
||||
legacyImage: false,
|
||||
supportedCapabilities: supportedCapabilities,
|
||||
expectedCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
{
|
||||
description: "Invalid capabilities panic",
|
||||
env: map[string]string{
|
||||
envNVDriverCapabilities: "compute,utility",
|
||||
},
|
||||
supportedCapabilities: "not-compute,not-utility",
|
||||
expectedPanic: true,
|
||||
},
|
||||
{
|
||||
description: "Default is restricted for modern image",
|
||||
legacyImage: false,
|
||||
supportedCapabilities: "compute",
|
||||
expectedCapabilities: "compute",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
var capabilites DriverCapabilities
|
||||
|
||||
getDriverCapabilities := func() {
|
||||
supportedCapabilities := DriverCapabilities(tc.supportedCapabilities)
|
||||
capabilites = getDriverCapabilities(tc.env, supportedCapabilities, tc.legacyImage)
|
||||
}
|
||||
|
||||
if tc.expectedPanic {
|
||||
require.Panics(t, getDriverCapabilities)
|
||||
return
|
||||
}
|
||||
|
||||
getDriverCapabilities()
|
||||
require.EqualValues(t, tc.expectedCapabilities, capabilites)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -35,10 +35,11 @@ type CLIConfig struct {
|
||||
|
||||
// HookConfig : options for the nvidia-container-toolkit.
|
||||
type HookConfig struct {
|
||||
DisableRequire bool `toml:"disable-require"`
|
||||
SwarmResource *string `toml:"swarm-resource"`
|
||||
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
|
||||
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
|
||||
DisableRequire bool `toml:"disable-require"`
|
||||
SwarmResource *string `toml:"swarm-resource"`
|
||||
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
|
||||
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
|
||||
SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"`
|
||||
|
||||
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
|
||||
}
|
||||
@@ -49,6 +50,7 @@ func getDefaultHookConfig() (config HookConfig) {
|
||||
SwarmResource: nil,
|
||||
AcceptEnvvarUnprivileged: true,
|
||||
AcceptDeviceListAsVolumeMounts: false,
|
||||
SupportedDriverCapabilities: allDriverCapabilities,
|
||||
NvidiaContainerCLI: CLIConfig{
|
||||
Root: nil,
|
||||
Path: nil,
|
||||
@@ -85,6 +87,15 @@ func getHookConfig() (config HookConfig) {
|
||||
}
|
||||
}
|
||||
|
||||
if config.SupportedDriverCapabilities == all {
|
||||
config.SupportedDriverCapabilities = allDriverCapabilities
|
||||
}
|
||||
// We ensure that the supported-driver-capabilites option is a subset of allDriverCapabilities
|
||||
if intersection := allDriverCapabilities.Intersection(config.SupportedDriverCapabilities); intersection != config.SupportedDriverCapabilities {
|
||||
configName := config.getConfigOption("SupportedDriverCapabilities")
|
||||
log.Panicf("Invalid value for config option '%v'; %v (supported: %v)\n", configName, config.SupportedDriverCapabilities, allDriverCapabilities)
|
||||
}
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
|
||||
105
cmd/nvidia-container-toolkit/hook_config_test.go
Normal file
105
cmd/nvidia-container-toolkit/hook_config_test.go
Normal file
@@ -0,0 +1,105 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetHookConfig(t *testing.T) {
|
||||
testCases := []struct {
|
||||
lines []string
|
||||
expectedPanic bool
|
||||
expectedDriverCapabilities DriverCapabilities
|
||||
}{
|
||||
{
|
||||
expectedDriverCapabilities: allDriverCapabilities,
|
||||
},
|
||||
{
|
||||
lines: []string{
|
||||
"supported-driver-capabilities = \"all\"",
|
||||
},
|
||||
expectedDriverCapabilities: allDriverCapabilities,
|
||||
},
|
||||
{
|
||||
lines: []string{
|
||||
"supported-driver-capabilities = \"compute,utility,not-compute\"",
|
||||
},
|
||||
expectedPanic: true,
|
||||
},
|
||||
{
|
||||
lines: []string{},
|
||||
expectedDriverCapabilities: allDriverCapabilities,
|
||||
},
|
||||
{
|
||||
lines: []string{
|
||||
"supported-driver-capabilities = \"\"",
|
||||
},
|
||||
expectedDriverCapabilities: none,
|
||||
},
|
||||
{
|
||||
lines: []string{
|
||||
"supported-driver-capabilities = \"utility,compute\"",
|
||||
},
|
||||
expectedDriverCapabilities: DriverCapabilities("utility,compute"),
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
var filename string
|
||||
defer func() {
|
||||
if len(filename) > 0 {
|
||||
os.Remove(filename)
|
||||
}
|
||||
configflag = nil
|
||||
}()
|
||||
|
||||
if tc.lines != nil {
|
||||
configFile, err := os.CreateTemp("", "*.toml")
|
||||
require.NoError(t, err)
|
||||
defer configFile.Close()
|
||||
|
||||
filename = configFile.Name()
|
||||
configflag = &filename
|
||||
|
||||
for _, line := range tc.lines {
|
||||
_, err := configFile.WriteString(fmt.Sprintf("%s\n", line))
|
||||
require.NoError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
var config HookConfig
|
||||
getHookConfig := func() {
|
||||
config = getHookConfig()
|
||||
}
|
||||
|
||||
if tc.expectedPanic {
|
||||
require.Panics(t, getHookConfig)
|
||||
return
|
||||
}
|
||||
|
||||
getHookConfig()
|
||||
|
||||
require.EqualValues(t, tc.expectedDriverCapabilities, config.SupportedDriverCapabilities)
|
||||
})
|
||||
}
|
||||
}
|
||||
19
config/config.toml.ubuntu+jetpack
Normal file
19
config/config.toml.ubuntu+jetpack
Normal file
@@ -0,0 +1,19 @@
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compute,compat32,graphics,utility,video,display"
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
#path = "/usr/bin/nvidia-container-cli"
|
||||
environment = []
|
||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||
#ldcache = "/etc/ld.so.cache"
|
||||
load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig.real"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
@@ -3,10 +3,12 @@ FROM ${BASEIMAGE}
|
||||
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
wget \
|
||||
gcc \
|
||||
git \
|
||||
make \
|
||||
rpm-build \
|
||||
make && \
|
||||
wget \
|
||||
&& \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
@@ -42,7 +44,9 @@ COPY . .
|
||||
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
COPY config/config.toml.amzn $DIST_DIR/config.toml
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
# This might not be useful on Amazon Linux, but it's simpler to keep the RHEL
|
||||
|
||||
@@ -3,10 +3,12 @@ FROM ${BASEIMAGE}
|
||||
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
wget \
|
||||
gcc \
|
||||
git \
|
||||
make \
|
||||
rpm-build && \
|
||||
rpm-build \
|
||||
wget \
|
||||
&& \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
@@ -42,7 +44,9 @@ COPY . .
|
||||
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
COPY config/config.toml.centos $DIST_DIR/config.toml
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
|
||||
|
||||
@@ -50,7 +50,9 @@ COPY . .
|
||||
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
COPY config/config.toml.debian $DIST_DIR/config.toml
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
# Debian Jessie still had ldconfig.real
|
||||
RUN if [ "$(lsb_release -cs)" = "jessie" ]; then \
|
||||
|
||||
@@ -47,7 +47,9 @@ COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
|
||||
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
|
||||
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
|
||||
|
||||
COPY config/config.toml.opensuse-leap $DIST_DIR/config.toml
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
@@ -48,7 +48,9 @@ COPY . .
|
||||
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
COPY config/config.toml.ubuntu $DIST_DIR/config.toml
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# Supported OSs by architecture
|
||||
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
|
||||
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux1 amazonlinux2 opensuse-leap15.1
|
||||
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
|
||||
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
|
||||
AARCH64_TARGETS := centos8 rhel8 amazonlinux2
|
||||
@@ -114,6 +114,9 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
||||
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
|
||||
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
|
||||
|
||||
# We allow the CONFIG_TOML_SUFFIX to be overridden.
|
||||
CONFIG_TOML_SUFFIX ?= $(OS)
|
||||
|
||||
docker-build-%:
|
||||
@echo "Building for $(TARGET_PLATFORM)"
|
||||
docker pull --platform=linux/$(ARCH) $(BASEIMAGE)
|
||||
@@ -124,6 +127,7 @@ docker-build-%:
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg PKG_VERS="$(LIB_VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
--file $(DOCKERFILE) .
|
||||
$(DOCKER) run \
|
||||
|
||||
56
examples/discover/main.go
Normal file
56
examples/discover/main.go
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func main() {
|
||||
log.Infof("Starting device discovery with NVML")
|
||||
|
||||
d, err := discover.NewNVMLServer("")
|
||||
if err != nil {
|
||||
log.Errorf("Error creating NVML Server: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
devices, err := d.Devices()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering mounts: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
|
||||
log.Infof("Discovered devices:")
|
||||
enc.Encode(devices)
|
||||
|
||||
log.Infof("Discovered libraries:")
|
||||
enc.Encode(mounts)
|
||||
}
|
||||
65
examples/filter/main.go
Normal file
65
examples/filter/main.go
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/filter"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func main() {
|
||||
d, err := discover.NewNVMLServer("")
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
}
|
||||
|
||||
selected := filter.NewSelectDevicesFrom(d, "all", nil)
|
||||
|
||||
devices, err := selected.Devices()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
mounts, err := selected.Mounts()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering mounts: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
hooks, err := selected.Hooks()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering hooks: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
|
||||
log.Infof("Discovered devices:")
|
||||
enc.Encode(devices)
|
||||
|
||||
log.Infof("Discovered libraries:")
|
||||
enc.Encode(mounts)
|
||||
|
||||
log.Infof("Discovered hook:")
|
||||
enc.Encode(hooks)
|
||||
}
|
||||
26
examples/print-ldcache/main.go
Normal file
26
examples/print-ldcache/main.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var logger = log.StandardLogger()
|
||||
|
||||
func main() {
|
||||
logger.SetLevel(log.DebugLevel)
|
||||
logger.Infof("Starting device discovery with NVML")
|
||||
|
||||
cache, err := ldcache.NewLDCacheWithLogger(logger, "/run/nvidia/driver")
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading ldcache: %v", err)
|
||||
return
|
||||
}
|
||||
defer cache.Close()
|
||||
|
||||
libs32, libs64 := cache.Lookup("lib")
|
||||
|
||||
logger.Infof("32-bit: %v", libs32)
|
||||
logger.Infof("64-bit: %v", libs64)
|
||||
|
||||
}
|
||||
2
go.mod
2
go.mod
@@ -4,7 +4,7 @@ go 1.14
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v0.3.1
|
||||
github.com/containerd/containerd v1.5.7
|
||||
github.com/NVIDIA/go-nvml v0.11.1-0
|
||||
github.com/containers/podman/v2 v2.2.1
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20211101234015-a3c33d663ebc
|
||||
github.com/pelletier/go-toml v1.9.3
|
||||
|
||||
51
internal/discover/binaries.go
Normal file
51
internal/discover/binaries.go
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewBinaryMounts creates a discoverer for binaries using the specified root
|
||||
func NewBinaryMounts(root string) Discover {
|
||||
return NewBinaryMountsWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewBinaryMountsWithLogger creates a Mounts discoverer as with NewBinaryMounts
|
||||
// with the specified logger
|
||||
func NewBinaryMountsWithLogger(logger *log.Logger, root string) Discover {
|
||||
d := mounts{
|
||||
logger: logger,
|
||||
lookup: lookup.NewPathLocatorWithLogger(logger, root),
|
||||
required: requiredBinaries,
|
||||
}
|
||||
return &d
|
||||
}
|
||||
|
||||
// requiredBinaries defines a set of binaries and their labels
|
||||
var requiredBinaries = map[string][]string{
|
||||
"utility": {
|
||||
"nvidia-smi", /* System management interface */
|
||||
"nvidia-debugdump", /* GPU coredump utility */
|
||||
"nvidia-persistenced", /* Persistence mode utility */
|
||||
},
|
||||
"compute": {
|
||||
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
||||
"nvidia-cuda-mps-server", /* Multi process service server */
|
||||
},
|
||||
}
|
||||
73
internal/discover/binaries_test.go
Normal file
73
internal/discover/binaries_test.go
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestBinaries(t *testing.T) {
|
||||
binaryLookup := map[string]string{
|
||||
"nvidia-smi": "/usr/bin/nvidia-smi",
|
||||
"nvidia-persistenced": "/usr/bin/nvidia-persistenced",
|
||||
"nvidia-debugdump": "test-duplicates",
|
||||
"nvidia-cuda-mps-control": "test-duplicates",
|
||||
}
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
d := NewBinaryMountsWithLogger(logger, "")
|
||||
|
||||
// Override lookup for testing
|
||||
mockLocator := NewLocatorMockFromMap(binaryLookup)
|
||||
d.(*mounts).lookup = mockLocator
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
|
||||
expected := []Mount{
|
||||
{
|
||||
Path: "/usr/bin/nvidia-smi",
|
||||
},
|
||||
{
|
||||
Path: "/usr/bin/nvidia-persistenced",
|
||||
},
|
||||
{
|
||||
Path: "test-duplicates",
|
||||
},
|
||||
}
|
||||
|
||||
require.Equal(t, len(expected), len(mounts))
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
|
||||
func TestNewBinariesConstructor(t *testing.T) {
|
||||
b := NewBinaryMounts("").(*mounts)
|
||||
|
||||
require.NotNil(t, b.logger)
|
||||
require.NotNil(t, b.lookup)
|
||||
}
|
||||
74
internal/discover/composite.go
Normal file
74
internal/discover/composite.go
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import "fmt"
|
||||
|
||||
// composite is a discoverer that contains a list of Discoverers. The output of the
|
||||
// Devices, Mounts, and Hooks functions is the concatenation of the output for each of the
|
||||
// elements in the list.
|
||||
type composite struct {
|
||||
discoverers []Discover
|
||||
}
|
||||
|
||||
var _ Discover = (*composite)(nil)
|
||||
|
||||
func (d composite) Devices() ([]Device, error) {
|
||||
var allDevices []Device
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
devices, err := di.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering devices for discoverer %v: %v", i, err)
|
||||
}
|
||||
allDevices = append(allDevices, devices...)
|
||||
}
|
||||
|
||||
return allDevices, nil
|
||||
}
|
||||
|
||||
func (d composite) Mounts() ([]Mount, error) {
|
||||
var allMounts []Mount
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
mounts, err := di.Mounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering mounts for discoverer %v: %v", i, err)
|
||||
}
|
||||
allMounts = append(allMounts, mounts...)
|
||||
}
|
||||
|
||||
return allMounts, nil
|
||||
}
|
||||
|
||||
func (d composite) Hooks() ([]Hook, error) {
|
||||
var allHooks []Hook
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
hooks, err := di.Hooks()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering hooks for discoverer %v: %v", i, err)
|
||||
}
|
||||
allHooks = append(allHooks, hooks...)
|
||||
}
|
||||
|
||||
return allHooks, nil
|
||||
}
|
||||
|
||||
func (d *composite) add(di ...Discover) {
|
||||
d.discoverers = append(d.discoverers, di...)
|
||||
}
|
||||
65
internal/discover/discover.go
Normal file
65
internal/discover/discover.go
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
// DevicePath is a path in /dev associated with a device
|
||||
type DevicePath string
|
||||
|
||||
// ProcPath is a path in /proc associated with a devices
|
||||
type ProcPath string
|
||||
|
||||
// PCIBusID is the ID on the PCI bus of a device
|
||||
type PCIBusID string
|
||||
|
||||
// DeviceNode represents a device on the file system
|
||||
type DeviceNode struct {
|
||||
Path DevicePath
|
||||
Major int
|
||||
Minor int
|
||||
}
|
||||
|
||||
// Device represents a discovered device including identifiers (Index, UUID, PCI bus ID)
|
||||
// for selection and paths in /dev and /proc associated with the device
|
||||
type Device struct {
|
||||
Index string
|
||||
UUID string
|
||||
PCIBusID PCIBusID
|
||||
DeviceNodes []DeviceNode
|
||||
ProcPaths []ProcPath
|
||||
}
|
||||
|
||||
// Mount represents a discovered mount. This includes a set of labels
|
||||
// for selection and the mount path
|
||||
type Mount struct {
|
||||
Path string
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
// Hook represents a discovered hook
|
||||
type Hook struct {
|
||||
Path string
|
||||
Args []string
|
||||
HookName string
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
// Discover defines an interface for discovering the devices and mounts available on a system
|
||||
type Discover interface {
|
||||
Devices() ([]Device, error)
|
||||
Mounts() ([]Mount, error)
|
||||
Hooks() ([]Hook, error)
|
||||
}
|
||||
424
internal/discover/discover_nvml.go
Normal file
424
internal/discover/discover_nvml.go
Normal file
@@ -0,0 +1,424 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
// ControlDeviceUUID is used as the UUID for control devices such as nvidiactl or nvidia-modeset
|
||||
ControlDeviceUUID = "CONTROL"
|
||||
|
||||
// MIGConfigDeviceUUID is used to indicate the MIG config control device
|
||||
MIGConfigDeviceUUID = "CONFIG"
|
||||
|
||||
// MIGMonitorDeviceUUID is used to indicate the MIG monitor control device
|
||||
MIGMonitorDeviceUUID = "MONITOR"
|
||||
|
||||
nvidiaGPUDeviceName = "nvidia-frontend"
|
||||
nvidiaCapsDeviceName = "nvidia-caps"
|
||||
nvidiaUVMDeviceName = "nvidia-uvm"
|
||||
)
|
||||
|
||||
type nvmlDiscover struct {
|
||||
None
|
||||
logger *log.Logger
|
||||
nvml nvml.Interface
|
||||
migCaps map[ProcPath]DeviceNode
|
||||
nvidiaDevices proc.NvidiaDevices
|
||||
}
|
||||
|
||||
var _ Discover = (*nvmlDiscover)(nil)
|
||||
|
||||
// NewNVMLDiscover constructs a discoverer that uses NVML to find the devices
|
||||
// available on a system.
|
||||
func NewNVMLDiscover(nvml nvml.Interface) (Discover, error) {
|
||||
return NewNVMLDiscoverWithLogger(log.StandardLogger(), nvml)
|
||||
}
|
||||
|
||||
// NewNVMLDiscoverWithLogger constructs a discovered as with NewNVMLDiscover with the specified
|
||||
// logger
|
||||
func NewNVMLDiscoverWithLogger(logger *log.Logger, nvml nvml.Interface) (Discover, error) {
|
||||
nvidiaDevices, err := proc.GetNvidiaDevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading NVIDIA devices: %v", err)
|
||||
}
|
||||
|
||||
var migCaps map[ProcPath]DeviceNode
|
||||
nvcapsDevice, exists := nvidiaDevices.Get(nvidiaCapsDeviceName)
|
||||
if !exists {
|
||||
logger.Warnf("%v nvcaps device could not be found", nvidiaCapsDeviceName)
|
||||
} else if migCaps, err = getMigCaps(nvcapsDevice.Major); err != nil {
|
||||
logger.Warnf("Could not load MIG capability devices: %v", err)
|
||||
migCaps = nil
|
||||
}
|
||||
|
||||
discover := &nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml,
|
||||
migCaps: migCaps,
|
||||
nvidiaDevices: nvidiaDevices,
|
||||
}
|
||||
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
// hasMigSupport checks if MIG device discovery is supported.
|
||||
// Cases where this will be disabled include where no MIG minors file is
|
||||
// present.
|
||||
func (d nvmlDiscover) hasMigSupport() bool {
|
||||
return len(d.migCaps) > 0
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) Devices() ([]Device, error) {
|
||||
ret := d.nvml.Init()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error initalizing NVML: %v", ret.Error())
|
||||
}
|
||||
defer d.tryShutdownNVML()
|
||||
|
||||
c, ret := d.nvml.DeviceGetCount()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device count: %v", ret.Error())
|
||||
}
|
||||
|
||||
var handles []nvml.Device
|
||||
for i := 0; i < c; i++ {
|
||||
handle, ret := d.nvml.DeviceGetHandleByIndex(i)
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device handle for device %v: %v", i, ret.Error())
|
||||
}
|
||||
|
||||
if !d.hasMigSupport() {
|
||||
handles = append(handles, handle)
|
||||
continue
|
||||
}
|
||||
|
||||
migHandles, err := getMIGHandlesForDevice(handle)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG handles for device: %v", err)
|
||||
}
|
||||
if len(migHandles) == 0 {
|
||||
handles = append(handles, handle)
|
||||
}
|
||||
handles = append(handles, migHandles...)
|
||||
}
|
||||
|
||||
return d.devicesByHandle(handles)
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) devicesByHandle(handles []nvml.Device) ([]Device, error) {
|
||||
var devices []Device
|
||||
var largestMinorNumber int
|
||||
for _, h := range handles {
|
||||
device, err := d.deviceFromNVMLHandle(h)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing device from handle %v: %v", h, err)
|
||||
}
|
||||
devices = append(devices, device)
|
||||
|
||||
if largestMinorNumber < device.DeviceNodes[0].Minor {
|
||||
largestMinorNumber = device.DeviceNodes[0].Minor
|
||||
}
|
||||
}
|
||||
|
||||
controlDevices, err := d.getControlDevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting control devices: %v", err)
|
||||
}
|
||||
devices = append(devices, controlDevices...)
|
||||
|
||||
if d.hasMigSupport() {
|
||||
migControlDevices, err := d.getMigControlDevices(largestMinorNumber)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG control devices: %v", err)
|
||||
}
|
||||
devices = append(devices, migControlDevices...)
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromNVMLHandle(handle nvml.Device) (Device, error) {
|
||||
if d.hasMigSupport() {
|
||||
isMigDevice, ret := handle.IsMigDeviceHandle()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error checking device handle: %v", ret.Error())
|
||||
}
|
||||
|
||||
if isMigDevice {
|
||||
return d.deviceFromMIGDeviceHandle(handle)
|
||||
}
|
||||
}
|
||||
|
||||
return d.deviceFromFullDeviceHandle(handle)
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromFullDeviceHandle(handle nvml.Device) (Device, error) {
|
||||
index, ret := handle.GetIndex()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device index: %v", ret.Error())
|
||||
}
|
||||
|
||||
uuid, ret := handle.GetUUID()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device UUID: %v", ret.Error())
|
||||
}
|
||||
|
||||
pciInfo, ret := handle.GetPciInfo()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting PCI info: %v", ret.Error())
|
||||
}
|
||||
busID := NewPCIBusID(pciInfo)
|
||||
|
||||
minor, ret := handle.GetMinorNumber()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting minor number: %v", ret.Error())
|
||||
}
|
||||
|
||||
nvidiaGPUDevice, exists := d.nvidiaDevices.Get(nvidiaGPUDeviceName)
|
||||
if !exists {
|
||||
return Device{}, fmt.Errorf("device for '%v' does not exist", nvidiaGPUDeviceName)
|
||||
}
|
||||
|
||||
deviceNode := DeviceNode{
|
||||
Path: DevicePath(fmt.Sprintf("/dev/nvidia%d", minor)),
|
||||
Major: nvidiaGPUDevice.Major,
|
||||
Minor: minor,
|
||||
}
|
||||
|
||||
device := Device{
|
||||
Index: fmt.Sprintf("%d", index),
|
||||
PCIBusID: busID,
|
||||
UUID: uuid,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: []ProcPath{busID.GetProcPath()},
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromMIGDeviceHandle(handle nvml.Device) (Device, error) {
|
||||
parent, ret := handle.GetDeviceHandleFromMigDeviceHandle()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting parent device handle: %v", ret.Error())
|
||||
}
|
||||
|
||||
gpu, ret := parent.GetMinorNumber()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting GPU minor number: %v", ret.Error())
|
||||
}
|
||||
|
||||
parentDevice, err := d.deviceFromFullDeviceHandle(parent)
|
||||
if err != nil {
|
||||
return Device{}, fmt.Errorf("error getting parent device: %v", err)
|
||||
}
|
||||
|
||||
index, ret := handle.GetIndex()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device index: %v", ret.Error())
|
||||
}
|
||||
|
||||
uuid, ret := handle.GetUUID()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device UUID: %v", ret.Error())
|
||||
}
|
||||
|
||||
capDeviceNodes := []DeviceNode{}
|
||||
procPaths, err := getProcPathsForMigDevice(gpu, handle)
|
||||
if err != nil {
|
||||
return Device{}, fmt.Errorf("error getting proc paths for MIG device: %v", err)
|
||||
}
|
||||
|
||||
for _, p := range procPaths {
|
||||
capDeviceNode, ok := d.migCaps[p]
|
||||
if !ok {
|
||||
return Device{}, fmt.Errorf("could not determine cap device path for %v", p)
|
||||
}
|
||||
capDeviceNodes = append(capDeviceNodes, capDeviceNode)
|
||||
}
|
||||
|
||||
device := Device{
|
||||
Index: fmt.Sprintf("%s:%d", parentDevice.Index, index),
|
||||
UUID: uuid,
|
||||
DeviceNodes: append(parentDevice.DeviceNodes, capDeviceNodes...),
|
||||
ProcPaths: append(parentDevice.ProcPaths, procPaths...),
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) getControlDevices() ([]Device, error) {
|
||||
devices := []struct {
|
||||
name string
|
||||
path string
|
||||
minor int
|
||||
}{
|
||||
// TODO: Where is the best place to find these device Minors programatically?
|
||||
{nvidiaGPUDeviceName, "/dev/nvidia-modeset", 254},
|
||||
{nvidiaGPUDeviceName, "/dev/nvidiactl", 255},
|
||||
{nvidiaUVMDeviceName, "/dev/nvidia-uvm", 0},
|
||||
{nvidiaUVMDeviceName, "/dev/nvidia-uvm-tools", 1},
|
||||
}
|
||||
|
||||
var controlDevices []Device
|
||||
for _, info := range devices {
|
||||
device, exists := d.nvidiaDevices.Get(info.name)
|
||||
if !exists {
|
||||
d.logger.Warnf("device name %v not defined; skipping control devices %v", info.name, info.path)
|
||||
continue
|
||||
}
|
||||
|
||||
deviceNode := DeviceNode{
|
||||
Path: DevicePath(info.path),
|
||||
Major: device.Major,
|
||||
Minor: info.minor,
|
||||
}
|
||||
|
||||
controlDevices = append(controlDevices, Device{
|
||||
UUID: ControlDeviceUUID,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: []ProcPath{},
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return controlDevices, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) getMigControlDevices(numGpus int) ([]Device, error) {
|
||||
targets := map[string]ProcPath{
|
||||
MIGConfigDeviceUUID: ProcPath("/proc/driver/nvidia/capabilities/mig/config"),
|
||||
MIGMonitorDeviceUUID: ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"),
|
||||
}
|
||||
|
||||
var devices []Device
|
||||
for id, procPath := range targets {
|
||||
deviceNode, exists := d.migCaps[procPath]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("device node for '%v' is undefined", procPath)
|
||||
}
|
||||
|
||||
var procPaths []ProcPath
|
||||
for gpu := 0; gpu <= numGpus; gpu++ {
|
||||
procPaths = append(procPaths, ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig", gpu)))
|
||||
}
|
||||
procPaths = append(procPaths, procPath)
|
||||
|
||||
devices = append(devices, Device{
|
||||
UUID: id,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: procPaths,
|
||||
})
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func getProcPathsForMigDevice(gpu int, handle nvml.Device) ([]ProcPath, error) {
|
||||
gi, ret := handle.GetGPUInstanceId()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU instance ID: %v", ret.Error())
|
||||
}
|
||||
|
||||
ci, ret := handle.GetComputeInstanceId()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting comput instance ID: %v", ret.Error())
|
||||
}
|
||||
|
||||
procPaths := []ProcPath{
|
||||
ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig/gi%d/access", gpu, gi)),
|
||||
ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)),
|
||||
}
|
||||
|
||||
return procPaths, nil
|
||||
}
|
||||
|
||||
func getMIGHandlesForDevice(handle nvml.Device) ([]nvml.Device, error) {
|
||||
currentMigMode, _, ret := handle.GetMigMode()
|
||||
if ret.Value() == nvml.ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting MIG mode for device: %v", ret.Error())
|
||||
}
|
||||
if currentMigMode == nvml.DEVICE_MIG_DISABLE {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
maxMigDeviceCount, ret := handle.GetMaxMigDeviceCount()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting number of MIG devices: %v", ret.Error())
|
||||
}
|
||||
|
||||
var migHandles []nvml.Device
|
||||
for mi := 0; mi < maxMigDeviceCount; mi++ {
|
||||
migHandle, ret := handle.GetMigDeviceHandleByIndex(mi)
|
||||
if ret.Value() == nvml.ERROR_NOT_FOUND {
|
||||
continue
|
||||
}
|
||||
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting MIG device %v: %v", mi, ret.Error())
|
||||
}
|
||||
|
||||
migHandles = append(migHandles, migHandle)
|
||||
}
|
||||
|
||||
return migHandles, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) tryShutdownNVML() {
|
||||
ret := d.nvml.Shutdown()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
d.logger.Warnf("Could not shut down NVML: %v", ret.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// NewPCIBusID provides a utility function that returns the string representation
|
||||
// of the bus ID.
|
||||
func NewPCIBusID(p nvml.PciInfo) PCIBusID {
|
||||
var bytes []byte
|
||||
for _, b := range p.BusId {
|
||||
if byte(b) == '\x00' {
|
||||
break
|
||||
}
|
||||
bytes = append(bytes, byte(b))
|
||||
}
|
||||
return PCIBusID(string(bytes))
|
||||
}
|
||||
|
||||
// GetProcPath returns the path in /proc associated with the PCI bus ID
|
||||
func (p PCIBusID) GetProcPath() ProcPath {
|
||||
id := strings.ToLower(p.String())
|
||||
|
||||
if strings.HasPrefix(id, "0000") {
|
||||
id = id[4:]
|
||||
}
|
||||
return ProcPath(filepath.Join("/proc/driver/nvidia/gpus", id))
|
||||
}
|
||||
|
||||
func (p PCIBusID) String() string {
|
||||
return string(p)
|
||||
}
|
||||
44
internal/discover/discover_nvml_mig.go
Normal file
44
internal/discover/discover_nvml_mig.go
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
)
|
||||
|
||||
// getMigCaps returns a mapping of MIG capability path to device nodes
|
||||
func getMigCaps(capDeviceMajor int) (map[ProcPath]DeviceNode, error) {
|
||||
migCaps, err := nvcaps.LoadMigMinors()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading MIG minors: %v", err)
|
||||
}
|
||||
return getMigCapsFromMigMinors(migCaps, capDeviceMajor), nil
|
||||
}
|
||||
|
||||
func getMigCapsFromMigMinors(migCaps map[nvcaps.MigCap]nvcaps.MigMinor, capDeviceMajor int) map[ProcPath]DeviceNode {
|
||||
capsDevicePaths := make(map[ProcPath]DeviceNode)
|
||||
for cap, minor := range migCaps {
|
||||
capsDevicePaths[ProcPath(cap.ProcPath())] = DeviceNode{
|
||||
Path: DevicePath(minor.DevicePath()),
|
||||
Major: capDeviceMajor,
|
||||
Minor: int(minor),
|
||||
}
|
||||
}
|
||||
return capsDevicePaths
|
||||
}
|
||||
41
internal/discover/discover_nvml_mig_test.go
Normal file
41
internal/discover/discover_nvml_mig_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetMigCaps(t *testing.T) {
|
||||
migMinors := map[nvcaps.MigCap]nvcaps.MigMinor{
|
||||
"config": 1,
|
||||
"monitor": 2,
|
||||
"gpu0/gi0/access": 3,
|
||||
"gpu0/gi0/ci0/access": 4,
|
||||
}
|
||||
|
||||
migCapMajor := 999
|
||||
migCaps := getMigCapsFromMigMinors(migMinors, migCapMajor)
|
||||
|
||||
require.Len(t, migCaps, len(migMinors))
|
||||
for _, c := range migCaps {
|
||||
require.Equal(t, migCapMajor, c.Major)
|
||||
}
|
||||
}
|
||||
219
internal/discover/discover_nvml_test.go
Normal file
219
internal/discover/discover_nvml_test.go
Normal file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaGPUDeviceMajorDefault = 195
|
||||
nvidiaCapsDeviceMajorDefault = 235
|
||||
)
|
||||
|
||||
func newTestDiscover() nvmlDiscover {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
nvml := nvml.NewMockNVMLServer(nvml.NewMockA100Device(0))
|
||||
|
||||
return nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml,
|
||||
nvidiaDevices: proc.NewMockNvidiaDevices(
|
||||
proc.Device{Name: nvidiaGPUDeviceName, Major: nvidiaGPUDeviceMajorDefault},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func newTestDiscoverWithMIG() nvmlDiscover {
|
||||
device := &nvml.MockA100Device{
|
||||
Index: 0,
|
||||
MigMode: nvml.DEVICE_MIG_ENABLE,
|
||||
GpuInstances: make(map[*nvml.MockA100GpuInstance]struct{}),
|
||||
GpuInstanceCounter: 0,
|
||||
}
|
||||
// Create a single gi and ci on the device
|
||||
gpuInstanceProfileInfo := &nvml.GpuInstanceProfileInfo{
|
||||
Id: nvml.GPU_INSTANCE_PROFILE_7_SLICE,
|
||||
}
|
||||
gi, _ := device.CreateGpuInstance(gpuInstanceProfileInfo)
|
||||
|
||||
computeInstanceProfileInfo := &nvml.ComputeInstanceProfileInfo{
|
||||
Id: nvml.COMPUTE_INSTANCE_PROFILE_1_SLICE,
|
||||
}
|
||||
_, _ = gi.CreateComputeInstance(computeInstanceProfileInfo)
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
return nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml.NewMockNVMLServer(device),
|
||||
migCaps: map[ProcPath]DeviceNode{
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap3"),
|
||||
Minor: 3,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap4"),
|
||||
Minor: 4,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/mig/config"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap1"),
|
||||
Minor: 1,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap2"),
|
||||
Minor: 2,
|
||||
Major: 235,
|
||||
},
|
||||
},
|
||||
nvidiaDevices: proc.NewMockNvidiaDevices(
|
||||
proc.Device{Name: nvidiaGPUDeviceName, Major: nvidiaGPUDeviceMajorDefault},
|
||||
proc.Device{Name: nvidiaCapsDeviceName, Major: nvidiaCapsDeviceMajorDefault},
|
||||
),
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestDiscoverNvmlDevices(t *testing.T) {
|
||||
d := newTestDiscover()
|
||||
devices, err := d.Devices()
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, devices, 3)
|
||||
|
||||
device := devices[0]
|
||||
|
||||
require.Equal(t, "0", device.Index)
|
||||
require.Equal(t, "GPU-0", device.UUID)
|
||||
require.Equal(t, "0000FFFF:FF:FF.F", device.PCIBusID.String())
|
||||
|
||||
expectedDeviceNodes := []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia0"),
|
||||
Minor: 0,
|
||||
Major: nvidiaGPUDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, device.DeviceNodes)
|
||||
|
||||
expectedProcPaths := []ProcPath{ProcPath("/proc/driver/nvidia/gpus/ffff:ff:ff.f")}
|
||||
require.Equal(t, expectedProcPaths, device.ProcPaths)
|
||||
}
|
||||
|
||||
func TestDiscoverNvmlMigDevices(t *testing.T) {
|
||||
d := newTestDiscoverWithMIG()
|
||||
|
||||
devices, err := d.Devices()
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, devices, 5)
|
||||
|
||||
mig := devices[0]
|
||||
|
||||
require.Equal(t, "0:0", mig.Index)
|
||||
require.Equal(t, "MIG-0", mig.UUID)
|
||||
require.Empty(t, mig.PCIBusID)
|
||||
|
||||
expectedDeviceNodes := []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia0"),
|
||||
Minor: 0,
|
||||
Major: nvidiaGPUDeviceMajorDefault,
|
||||
},
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap3"),
|
||||
Minor: 3,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap4"),
|
||||
Minor: 4,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, mig.DeviceNodes)
|
||||
|
||||
expectedProcPaths := []ProcPath{
|
||||
ProcPath("/proc/driver/nvidia/gpus/ffff:ff:ff.f"),
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"),
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"),
|
||||
}
|
||||
require.Equal(t, expectedProcPaths, mig.ProcPaths)
|
||||
|
||||
var config *Device
|
||||
var monitor *Device
|
||||
for i, d := range devices {
|
||||
if d.UUID == "CONFIG" {
|
||||
config = &devices[i]
|
||||
}
|
||||
if d.UUID == "MONITOR" {
|
||||
monitor = &devices[i]
|
||||
}
|
||||
}
|
||||
|
||||
require.NotNil(t, config)
|
||||
require.NotNil(t, monitor)
|
||||
|
||||
require.Equal(t, "CONFIG", config.UUID)
|
||||
|
||||
expectedDeviceNodes = []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap1"),
|
||||
Minor: 1,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, config.DeviceNodes)
|
||||
|
||||
require.Contains(t, config.ProcPaths, ProcPath("/proc/driver/nvidia/capabilities/mig/config"))
|
||||
require.Len(t, config.ProcPaths, 2)
|
||||
|
||||
require.Equal(t, "MONITOR", monitor.UUID)
|
||||
|
||||
expectedDeviceNodes = []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap2"),
|
||||
Minor: 2,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, monitor.DeviceNodes)
|
||||
|
||||
require.Contains(t, monitor.ProcPaths, ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"))
|
||||
require.Len(t, monitor.ProcPaths, 2)
|
||||
|
||||
}
|
||||
|
||||
func TestPCIBusID(t *testing.T) {
|
||||
testCases := map[string]ProcPath{
|
||||
"0000FFFF:FF:FF.F": "/proc/driver/nvidia/gpus/ffff:ff:ff.f",
|
||||
"FFFFFFFF:FF:FF.F": "/proc/driver/nvidia/gpus/ffffffff:ff:ff.f",
|
||||
}
|
||||
|
||||
for busID, procPath := range testCases {
|
||||
p := PCIBusID(busID)
|
||||
require.Equal(t, busID, p.String())
|
||||
require.Equal(t, procPath, p.GetProcPath())
|
||||
}
|
||||
}
|
||||
71
internal/discover/hooks.go
Normal file
71
internal/discover/hooks.go
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type hooks struct {
|
||||
None
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
var _ Discover = (*hooks)(nil)
|
||||
|
||||
// NewHooks creates a discoverer for linux containers
|
||||
func NewHooks() Discover {
|
||||
return NewHooksWithLogger(log.StandardLogger())
|
||||
}
|
||||
|
||||
// NewHooksWithLogger creates a discoverer as with NewHooks with the specified logger
|
||||
func NewHooksWithLogger(logger *log.Logger) Discover {
|
||||
h := hooks{
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
return &h
|
||||
}
|
||||
|
||||
func (h hooks) Hooks() ([]Hook, error) {
|
||||
var hooks []Hook
|
||||
|
||||
hooks = append(hooks, newLdconfigHook())
|
||||
|
||||
return hooks, nil
|
||||
}
|
||||
|
||||
func newLdconfigHook() Hook {
|
||||
const rootPattern = "@Root.Path@"
|
||||
|
||||
h := Hook{
|
||||
Path: "/sbin/ldconfig",
|
||||
Args: []string{
|
||||
// TODO: Testing seems to indicate that this is -v flag is required
|
||||
"-v",
|
||||
"-r", rootPattern,
|
||||
},
|
||||
// TODO: CreateContainer hooks were only added to a later OCI spec version
|
||||
// We will have to find a way to deal with OCI versions before 1.0.2
|
||||
HookName: "create-container",
|
||||
Labels: map[string]string{
|
||||
"min-oci-version": "1.0.2",
|
||||
},
|
||||
}
|
||||
|
||||
return h
|
||||
}
|
||||
52
internal/discover/ipc.go
Normal file
52
internal/discover/ipc.go
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewIPCMounts creates a discoverer for IPC sockets
|
||||
func NewIPCMounts(root string) Discover {
|
||||
return NewIPCMountsWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewIPCMountsWithLogger creates a discovered as with NewIPCMounts with the
|
||||
// specified logger.
|
||||
func NewIPCMountsWithLogger(logger *log.Logger, root string) Discover {
|
||||
d := mounts{
|
||||
logger: logger,
|
||||
lookup: lookup.NewFileLocatorWithLogger(logger, root),
|
||||
required: requiredIPCs,
|
||||
}
|
||||
|
||||
return &d
|
||||
}
|
||||
|
||||
var requiredIPCs = map[string][]string{
|
||||
"nvidia-persistenced": {
|
||||
"/var/run/nvidia-persistenced/socket",
|
||||
},
|
||||
"nvidia-fabricmanager": {
|
||||
"/var/run/nvidia-fabricmanager/socket",
|
||||
},
|
||||
// TODO: This can be controlled by the NV_MPS_PIPE_DIR envvar
|
||||
"nvidia-mps": {
|
||||
"/tmp/nvidia-mps",
|
||||
},
|
||||
}
|
||||
56
internal/discover/ipc_test.go
Normal file
56
internal/discover/ipc_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIPCDiscover(t *testing.T) {
|
||||
|
||||
ipcLookup := map[string]string{
|
||||
"/var/run/nvidia-persistenced/socket": "/var/run/nvidia-persistenced/socket",
|
||||
"/var/run/nvidia-fabricmanager/socket": "fm-socket",
|
||||
}
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
d := NewIPCMountsWithLogger(logger, "")
|
||||
|
||||
// Override lookup for testing
|
||||
mockLocator := NewLocatorMockFromMap(ipcLookup)
|
||||
d.(*mounts).lookup = mockLocator
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, []Mount{
|
||||
{Path: "/var/run/nvidia-persistenced/socket", Labels: map[string]string{}},
|
||||
{Path: "fm-socket", Labels: map[string]string{}}}, mounts)
|
||||
|
||||
require.Len(t, mockLocator.LocateCalls(), 3)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
100
internal/discover/libraries.go
Normal file
100
internal/discover/libraries.go
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewLibraries constructs discoverer for libraries
|
||||
func NewLibraries(root string) (Discover, error) {
|
||||
return NewLibrariesWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewLibrariesWithLogger constructs discoverer for libraries with the specified logger
|
||||
func NewLibrariesWithLogger(logger *log.Logger, root string) (Discover, error) {
|
||||
lookup, err := lookup.NewLibraryLocatorWithLogger(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing locator: %v", err)
|
||||
}
|
||||
|
||||
d := mounts{
|
||||
logger: logger,
|
||||
lookup: lookup,
|
||||
required: requiredLibraries,
|
||||
}
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
// requiredLibraries defines a set of libraries and their labels
|
||||
var requiredLibraries = map[string][]string{
|
||||
"utility": {
|
||||
"libnvidia-ml.so", /* Management library */
|
||||
"libnvidia-cfg.so", /* GPU configuration */
|
||||
},
|
||||
"compute": {
|
||||
"libcuda.so", /* CUDA driver library */
|
||||
"libnvidia-opencl.so", /* NVIDIA OpenCL ICD */
|
||||
"libnvidia-ptxjitcompiler.so", /* PTX-SASS JIT compiler (used by libcuda) */
|
||||
"libnvidia-fatbinaryloader.so", /* fatbin loader (used by libcuda) */
|
||||
"libnvidia-allocator.so", /* NVIDIA allocator runtime library */
|
||||
"libnvidia-compiler.so", /* NVVM-PTX compiler for OpenCL (used by libnvidia-opencl) */
|
||||
},
|
||||
"video": {
|
||||
"libvdpau_nvidia.so", /* NVIDIA VDPAU ICD */
|
||||
"libnvidia-encode.so", /* Video encoder */
|
||||
"libnvidia-opticalflow.so", /* NVIDIA Opticalflow library */
|
||||
"libnvcuvid.so", /* Video decoder */
|
||||
},
|
||||
"graphics": {
|
||||
//"libnvidia-egl-wayland.so", /* EGL wayland platform extension (used by libEGL_nvidia) */
|
||||
"libnvidia-eglcore.so", /* EGL core (used by libGLES*[_nvidia] and libEGL_nvidia) */
|
||||
"libnvidia-glcore.so", /* OpenGL core (used by libGL or libGLX_nvidia) */
|
||||
"libnvidia-tls.so", /* Thread local storage (used by libGL or libGLX_nvidia) */
|
||||
"libnvidia-glsi.so", /* OpenGL system interaction (used by libEGL_nvidia) */
|
||||
"libnvidia-fbc.so", /* Framebuffer capture */
|
||||
"libnvidia-ifr.so", /* OpenGL framebuffer capture */
|
||||
"libnvidia-rtcore.so", /* Optix */
|
||||
"libnvoptix.so", /* Optix */
|
||||
},
|
||||
"glvnd": {
|
||||
//"libGLX.so", /* GLX ICD loader */
|
||||
//"libOpenGL.so", /* OpenGL ICD loader */
|
||||
//"libGLdispatch.so", /* OpenGL dispatch (used by libOpenGL, libEGL and libGLES*) */
|
||||
"libGLX_nvidia.so", /* OpenGL/GLX ICD */
|
||||
"libEGL_nvidia.so", /* EGL ICD */
|
||||
"libGLESv2_nvidia.so", /* OpenGL ES v2 ICD */
|
||||
"libGLESv1_CM_nvidia.so", /* OpenGL ES v1 common profile ICD */
|
||||
"libnvidia-glvkspirv.so", /* SPIR-V Lib for Vulkan */
|
||||
"libnvidia-cbl.so", /* VK_NV_ray_tracing */
|
||||
},
|
||||
"compat32": {
|
||||
"libGL.so", /* OpenGL/GLX legacy _or_ compatibility wrapper (GLVND) */
|
||||
"libEGL.so", /* EGL legacy _or_ ICD loader (GLVND) */
|
||||
"libGLESv1_CM.so", /* OpenGL ES v1 common profile legacy _or_ ICD loader (GLVND) */
|
||||
"libGLESv2.so", /* OpenGL ES v2 legacy _or_ ICD loader (GLVND) */
|
||||
},
|
||||
"ngx": {
|
||||
"libnvidia-ngx.so", /* NGX library */
|
||||
},
|
||||
"dxcore": {
|
||||
"libdxcore.so", /* Core library for dxcore support */
|
||||
},
|
||||
}
|
||||
56
internal/discover/libraries_test.go
Normal file
56
internal/discover/libraries_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestLibraries(t *testing.T) {
|
||||
|
||||
libraryLookup := map[string]string{
|
||||
"libcuda.so": "/lib/libcuda.so.999.99",
|
||||
"libversion.so": "/lib/libversion.so.111.11",
|
||||
"libother.so": "/lib/libother.so.999.99",
|
||||
}
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
d, err := NewLibrariesWithLogger(logger, "")
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
// Override lookup for testing
|
||||
mockLocator := NewLocatorMockFromMap(libraryLookup)
|
||||
d.(*mounts).lookup = mockLocator
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.ElementsMatch(t, []Mount{{Path: "/lib/libcuda.so.999.99", Labels: map[string]string{}}}, mounts)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
125
internal/discover/mounts.go
Normal file
125
internal/discover/mounts.go
Normal file
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
capabilityLabel = "capability"
|
||||
versionLabel = "version"
|
||||
)
|
||||
|
||||
// mounts is a generic discoverer for Mounts. It is customized by specifying the
|
||||
// required entities as a key-value pair as well as a Locator that is used to
|
||||
// identify the mounts that are to be included.
|
||||
type mounts struct {
|
||||
None
|
||||
logger *log.Logger
|
||||
lookup lookup.Locator
|
||||
required map[string][]string
|
||||
}
|
||||
|
||||
var _ Discover = (*mounts)(nil)
|
||||
|
||||
func (d mounts) Mounts() ([]Mount, error) {
|
||||
mounts, err := d.uniqueMounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering mounts: %v", err)
|
||||
}
|
||||
|
||||
return mounts.Slice(), nil
|
||||
}
|
||||
|
||||
func (d mounts) uniqueMounts() (mountsByPath, error) {
|
||||
if d.lookup == nil {
|
||||
return nil, fmt.Errorf("no lookup defined")
|
||||
}
|
||||
|
||||
mounts := make(mountsByPath)
|
||||
|
||||
for id, keys := range d.required {
|
||||
for _, key := range keys {
|
||||
d.logger.Debugf("Locating %v [%v]", key, id)
|
||||
located, err := d.lookup.Locate(key)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Could not locate %v [%v]: %v", key, id, err)
|
||||
continue
|
||||
}
|
||||
d.logger.Infof("Located %v [%v]: %v", key, id, located)
|
||||
for _, p := range located {
|
||||
// TODO: We need to add labels
|
||||
mount := newMount(p)
|
||||
mounts.Put(mount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mounts, nil
|
||||
}
|
||||
|
||||
type mountsByPath map[string]Mount
|
||||
|
||||
func (m mountsByPath) Slice() []Mount {
|
||||
var mounts []Mount
|
||||
for _, mount := range m {
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
|
||||
return mounts
|
||||
}
|
||||
|
||||
func (m *mountsByPath) Put(value Mount) {
|
||||
key := value.Path
|
||||
mount, exists := (*m)[key]
|
||||
if !exists {
|
||||
(*m)[key] = value
|
||||
return
|
||||
}
|
||||
|
||||
for k, v := range value.Labels {
|
||||
mount.Labels[k] = v
|
||||
}
|
||||
(*m)[key] = mount
|
||||
}
|
||||
|
||||
// NewMountForCapability creates a mount with the specified capability label
|
||||
func NewMountForCapability(path string, capability string) Mount {
|
||||
return newMount(path, capabilityLabel, capability)
|
||||
}
|
||||
|
||||
// NewMountForVersion creates a mount with the specified version label
|
||||
func NewMountForVersion(path string, version string) Mount {
|
||||
return newMount(path, versionLabel, version)
|
||||
}
|
||||
|
||||
func newMount(path string, labels ...string) Mount {
|
||||
l := make(map[string]string)
|
||||
|
||||
for i := 0; i < len(labels)-1; i += 2 {
|
||||
l[labels[i]] = labels[i+1]
|
||||
}
|
||||
|
||||
return Mount{
|
||||
Path: path,
|
||||
Labels: l,
|
||||
}
|
||||
}
|
||||
53
internal/discover/mounts_test.go
Normal file
53
internal/discover/mounts_test.go
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestMountsReturnsErrorForNoLookup(t *testing.T) {
|
||||
d := mounts{}
|
||||
mounts, err := d.Mounts()
|
||||
|
||||
require.Error(t, err)
|
||||
require.Len(t, mounts, 0)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
|
||||
func NewLocatorMockFromMap(lookupMap map[string]string) *lookup.LocatorMock {
|
||||
return &lookup.LocatorMock{
|
||||
LocateFunc: func(key string) ([]string, error) {
|
||||
value, exists := lookupMap[key]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("key %v not found", key)
|
||||
}
|
||||
return []string{value}, nil
|
||||
},
|
||||
}
|
||||
}
|
||||
38
internal/discover/none.go
Normal file
38
internal/discover/none.go
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
// None is a null discoverer that returns an empty list of devices and
|
||||
// mounts.
|
||||
type None struct{}
|
||||
|
||||
var _ Discover = (*None)(nil)
|
||||
|
||||
// Devices returns an empty list of devices
|
||||
func (e None) Devices() ([]Device, error) {
|
||||
return []Device{}, nil
|
||||
}
|
||||
|
||||
// Mounts returns an empty list of mounts
|
||||
func (e None) Mounts() ([]Mount, error) {
|
||||
return []Mount{}, nil
|
||||
}
|
||||
|
||||
// Hooks returns an empty list of hooks
|
||||
func (e None) Hooks() ([]Hook, error) {
|
||||
return []Hook{}, nil
|
||||
}
|
||||
39
internal/discover/none_test.go
Normal file
39
internal/discover/none_test.go
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNone(t *testing.T) {
|
||||
d := None{}
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, mounts)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
71
internal/discover/nvml_server.go
Normal file
71
internal/discover/nvml_server.go
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type nvmlServer struct {
|
||||
logger *log.Logger
|
||||
composite
|
||||
}
|
||||
|
||||
var _ Discover = (*nvmlServer)(nil)
|
||||
|
||||
// NewNVMLServer constructs a discoverer for server systems using NVML to discover devices
|
||||
func NewNVMLServer(root string) (Discover, error) {
|
||||
return NewNVMLServerWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewNVMLServerWithLogger constructs a discoverer for server systems using NVML to discover devices with
|
||||
// the specified logger
|
||||
func NewNVMLServerWithLogger(logger *log.Logger, root string) (Discover, error) {
|
||||
return createNVMLServer(logger, nvml.New(), root)
|
||||
}
|
||||
|
||||
func createNVMLServer(logger *log.Logger, nvml nvml.Interface, root string) (Discover, error) {
|
||||
d := nvmlServer{
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
devices, err := NewNVMLDiscoverWithLogger(logger, nvml)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing NVML device discoverer: %v", err)
|
||||
}
|
||||
|
||||
libraries, err := NewLibrariesWithLogger(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing library discoverer: %v", err)
|
||||
}
|
||||
|
||||
d.add(
|
||||
// Device discovery
|
||||
devices,
|
||||
// Mounts discovery
|
||||
libraries,
|
||||
NewBinaryMountsWithLogger(logger, root),
|
||||
NewIPCMountsWithLogger(logger, root),
|
||||
// Hook discovery
|
||||
NewHooksWithLogger(logger),
|
||||
)
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
66
internal/discover/nvml_server_test.go
Normal file
66
internal/discover/nvml_server_test.go
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const (
|
||||
testMajor = 999
|
||||
)
|
||||
|
||||
func TestNVMLServerConstructor(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
nvml := nvml.NewMockNVMLOnLunaServer()
|
||||
|
||||
d, err := createNVMLServer(logger, nvml, "")
|
||||
require.NoError(t, err)
|
||||
|
||||
instance := d.(*nvmlServer)
|
||||
require.Len(t, instance.discoverers, 1+3+1)
|
||||
|
||||
// We need to override the nvidiaDevices member of the nvmlDiscovery
|
||||
// TODO: Use a mock instead, or allow for injection into a constructor
|
||||
instance.discoverers[0].(*nvmlDiscover).nvidiaDevices = &mockNvidiaDevices{}
|
||||
|
||||
devices, err := d.Devices()
|
||||
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, devices)
|
||||
|
||||
_, err = d.Mounts()
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
type mockNvidiaDevices struct{}
|
||||
|
||||
var _ proc.NvidiaDevices = (*mockNvidiaDevices)(nil)
|
||||
|
||||
func (d mockNvidiaDevices) Get(name string) (proc.Device, bool) {
|
||||
return proc.Device{Name: name, Major: testMajor}, true
|
||||
}
|
||||
|
||||
func (d mockNvidiaDevices) Exists(string) bool {
|
||||
return false
|
||||
}
|
||||
129
internal/ensure/devices.go
Normal file
129
internal/ensure/devices.go
Normal file
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package ensure
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type ensureDevices struct {
|
||||
logger *log.Logger
|
||||
discover.Discover
|
||||
lookup lookup.Locator
|
||||
root string
|
||||
}
|
||||
|
||||
// NewEnsureDevices creates a discoverer that wraps the specified discoverer and ensures that the
|
||||
// device nodes for the discoverer are created. If a root is specified, the device nodes
|
||||
// rooted there are also created.
|
||||
func NewEnsureDevices(d discover.Discover, root string) discover.Discover {
|
||||
return NewEnsureDevicesWithLogger(log.StandardLogger(), d, root)
|
||||
}
|
||||
|
||||
// NewEnsureDevicesWithLogger creates a discoverer that wraps the specified discoverer and ensures that the
|
||||
// device nodes for the discoverer are created. If a root is specified, the device nodes
|
||||
// rooted there are also created. The specified logger is used.
|
||||
func NewEnsureDevicesWithLogger(logger *log.Logger, d discover.Discover, root string) discover.Discover {
|
||||
e := ensureDevices{
|
||||
Discover: d,
|
||||
logger: logger,
|
||||
lookup: lookup.NewPathLocatorWithLogger(logger, root),
|
||||
root: root,
|
||||
}
|
||||
return &e
|
||||
}
|
||||
|
||||
func (d ensureDevices) Devices() ([]discover.Device, error) {
|
||||
devices, err := d.Discover.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering devices: %v", err)
|
||||
}
|
||||
for _, di := range devices {
|
||||
for _, dn := range di.DeviceNodes {
|
||||
d.deviceNode(dn)
|
||||
}
|
||||
}
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func (d ensureDevices) deviceNode(dn discover.DeviceNode) error {
|
||||
err := d.device(dn.Path, dn.Major, dn.Minor)
|
||||
if err != nil {
|
||||
d.logger.Errorf("Error creating device node %+v: %v", dn, err)
|
||||
}
|
||||
|
||||
if d.root != "" && d.root != "/" {
|
||||
rootedPath := discover.DevicePath(filepath.Join(d.root, string(dn.Path)))
|
||||
err = d.device(rootedPath, dn.Major, dn.Minor)
|
||||
if err != nil {
|
||||
d.logger.Errorf("Error creating device node %+v: %v", dn, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d ensureDevices) device(path discover.DevicePath, major int, minor int) error {
|
||||
// TODO: We may want to check that the device node has the required permissions
|
||||
_, err := os.Stat(string(path))
|
||||
if err == nil {
|
||||
d.logger.Infof("Device node %v already exists", path)
|
||||
return nil
|
||||
}
|
||||
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
d.logger.Errorf("Error getting info for device node %v: %v", path, err)
|
||||
return fmt.Errorf("error getting device node info: %w", err)
|
||||
}
|
||||
// See: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#runfile-verifications
|
||||
|
||||
// TODO: We should use nvidia-modprobe or a tool based off that instead
|
||||
args := []string{
|
||||
"-m", "666",
|
||||
string(path), "c",
|
||||
fmt.Sprint(major),
|
||||
fmt.Sprint(minor),
|
||||
}
|
||||
|
||||
return d.run("mknod", args...)
|
||||
}
|
||||
|
||||
func (d ensureDevices) run(cmd string, args ...string) error {
|
||||
paths, err := d.lookup.Locate(cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error finding command %v: %v", cmd, err)
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
return fmt.Errorf("command %v not found in path", cmd)
|
||||
}
|
||||
path := paths[0]
|
||||
|
||||
d.logger.Debugf("Running %v", append([]string{path}, args...))
|
||||
err = exec.Command(path, args...).Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error running %v: %v", append([]string{path}, args...), err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
22
internal/ensure/ensure.go
Normal file
22
internal/ensure/ensure.go
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package ensure
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
|
||||
// Ensure is an alias for Discover
|
||||
type Ensure discover.Discover
|
||||
41
internal/filter/all.go
Normal file
41
internal/filter/all.go
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
|
||||
type all struct {
|
||||
selectors []Selector
|
||||
}
|
||||
|
||||
// All returns a selector that evaluates true if EACH of the specified selectors
|
||||
// are selected.
|
||||
func All(selectors ...Selector) Selector {
|
||||
s := all{
|
||||
selectors: selectors,
|
||||
}
|
||||
return &s
|
||||
}
|
||||
|
||||
func (s all) Selected(device discover.Device) bool {
|
||||
for _, si := range s.selectors {
|
||||
if !si.Selected(device) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
76
internal/filter/all_test.go
Normal file
76
internal/filter/all_test.go
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAll(t *testing.T) {
|
||||
True := &SelectorMock{
|
||||
SelectedFunc: func(discover.Device) bool {
|
||||
return true
|
||||
},
|
||||
}
|
||||
False := &SelectorMock{
|
||||
SelectedFunc: func(discover.Device) bool {
|
||||
return false
|
||||
},
|
||||
}
|
||||
|
||||
d := discover.Device{}
|
||||
|
||||
// Ensure that the mocks are set up correctly:
|
||||
require.True(t, True.Selected(d))
|
||||
require.False(t, False.Selected(d))
|
||||
|
||||
emtpy := All()
|
||||
require.True(t, emtpy.Selected(d))
|
||||
|
||||
s00 := All(False, False)
|
||||
require.False(t, s00.Selected(d))
|
||||
|
||||
s01 := All(False, True)
|
||||
require.False(t, s01.Selected(d))
|
||||
|
||||
s10 := All(True, False)
|
||||
require.False(t, s10.Selected(d))
|
||||
|
||||
s11 := All(True, True)
|
||||
require.True(t, s11.Selected(d))
|
||||
}
|
||||
|
||||
type discoverMock struct {
|
||||
discover.None
|
||||
devices []discover.Device
|
||||
devicesError error
|
||||
mounts []discover.Mount
|
||||
mountsError error
|
||||
}
|
||||
|
||||
var _ discover.Discover = (*discoverMock)(nil)
|
||||
|
||||
func (m discoverMock) Devices() ([]discover.Device, error) {
|
||||
return m.devices, m.devicesError
|
||||
}
|
||||
|
||||
func (m discoverMock) Mounts() ([]discover.Mount, error) {
|
||||
return m.mounts, m.mountsError
|
||||
}
|
||||
41
internal/filter/any.go
Normal file
41
internal/filter/any.go
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
|
||||
type any struct {
|
||||
selectors []Selector
|
||||
}
|
||||
|
||||
// Any returns a selector that evaluates true if ANY of the specified selectors
|
||||
// are selected
|
||||
func Any(selectors ...Selector) Selector {
|
||||
s := any{
|
||||
selectors: selectors,
|
||||
}
|
||||
return &s
|
||||
}
|
||||
|
||||
func (s any) Selected(device discover.Device) bool {
|
||||
for _, si := range s.selectors {
|
||||
if si.Selected(device) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
58
internal/filter/any_test.go
Normal file
58
internal/filter/any_test.go
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. Any rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAny(t *testing.T) {
|
||||
True := &SelectorMock{
|
||||
SelectedFunc: func(discover.Device) bool {
|
||||
return true
|
||||
},
|
||||
}
|
||||
False := &SelectorMock{
|
||||
SelectedFunc: func(discover.Device) bool {
|
||||
return false
|
||||
},
|
||||
}
|
||||
|
||||
d := discover.Device{}
|
||||
|
||||
// Ensure that the mocks are set up correctly:
|
||||
require.True(t, True.Selected(d))
|
||||
require.False(t, False.Selected(d))
|
||||
|
||||
emtpy := Any()
|
||||
require.False(t, emtpy.Selected(d))
|
||||
|
||||
s00 := Any(False, False)
|
||||
require.False(t, s00.Selected(d))
|
||||
|
||||
s01 := Any(False, True)
|
||||
require.True(t, s01.Selected(d))
|
||||
|
||||
s10 := Any(True, False)
|
||||
require.True(t, s10.Selected(d))
|
||||
|
||||
s11 := Any(True, True)
|
||||
require.True(t, s11.Selected(d))
|
||||
}
|
||||
60
internal/filter/by_id.go
Normal file
60
internal/filter/by_id.go
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
)
|
||||
|
||||
type devicesByID map[string]struct{}
|
||||
|
||||
var _ Selector = (*devicesByID)(nil)
|
||||
|
||||
// NewDeviceSelector creates a selector for devices based on the specified IDs.
|
||||
func NewDeviceSelector(ids ...string) Selector {
|
||||
deviceIDs := make(devicesByID)
|
||||
|
||||
for _, id := range ids {
|
||||
deviceIDs[id] = struct{}{}
|
||||
}
|
||||
|
||||
return deviceIDs
|
||||
}
|
||||
|
||||
// Selected checks whether a specific device is included in the set of devicesIDs
|
||||
// The device is checked by UUID, Index, and PCIBusID and if any of these match
|
||||
// the device is considered selected.
|
||||
func (d devicesByID) Selected(device discover.Device) bool {
|
||||
var exists bool
|
||||
|
||||
_, exists = d[device.UUID]
|
||||
if exists {
|
||||
return true
|
||||
}
|
||||
|
||||
_, exists = d[device.Index]
|
||||
if exists {
|
||||
return true
|
||||
}
|
||||
|
||||
_, exists = d[device.PCIBusID.String()]
|
||||
if exists {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
47
internal/filter/by_id_test.go
Normal file
47
internal/filter/by_id_test.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDeviceByID(t *testing.T) {
|
||||
device := discover.Device{
|
||||
Index: "index",
|
||||
UUID: "uuid",
|
||||
PCIBusID: discover.PCIBusID("pcibusid"),
|
||||
}
|
||||
|
||||
require.False(t, NewDeviceSelector().Selected(device))
|
||||
require.False(t, NewDeviceSelector("notindex", "notuuid", "notpcibusid").Selected(device))
|
||||
|
||||
require.True(t, NewDeviceSelector("index").Selected(device))
|
||||
require.True(t, NewDeviceSelector("notindex", "index").Selected(device))
|
||||
|
||||
require.True(t, NewDeviceSelector("uuid").Selected(device))
|
||||
require.True(t, NewDeviceSelector("notuuid", "uuid").Selected(device))
|
||||
|
||||
require.True(t, NewDeviceSelector("pcibusid").Selected(device))
|
||||
require.True(t, NewDeviceSelector("notpcibusid", "pcibusid").Selected(device))
|
||||
|
||||
require.True(t, NewDeviceSelector("index", "uuid", "pcibusid").Selected(device))
|
||||
|
||||
}
|
||||
90
internal/filter/devices.go
Normal file
90
internal/filter/devices.go
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
visibleDevicesAll = "all"
|
||||
visibleDevicesNone = "none"
|
||||
visibleDevicesVoid = "void"
|
||||
)
|
||||
|
||||
type devices struct {
|
||||
discover.Discover
|
||||
logger *log.Logger
|
||||
selector Selector
|
||||
}
|
||||
|
||||
var _ discover.Discover = (*devices)(nil)
|
||||
|
||||
// NewSelectDevicesFrom creates a filter that selects devices based on the value of the
|
||||
// visible devices string.
|
||||
func NewSelectDevicesFrom(d discover.Discover, visibleDevices string, env EnvLookup) discover.Discover {
|
||||
return NewSelectDevicesFromWithLogger(log.StandardLogger(), d, visibleDevices, env)
|
||||
}
|
||||
|
||||
// NewSelectDevicesFromWithLogger creates a filter as for NewSelectDevicesFrom with the
|
||||
// specified logger.
|
||||
func NewSelectDevicesFromWithLogger(logger *log.Logger, d discover.Discover, visibleDevices string, env EnvLookup) discover.Discover {
|
||||
if visibleDevices == "" || visibleDevices == visibleDevicesNone || visibleDevices == visibleDevicesVoid {
|
||||
return &discover.None{}
|
||||
}
|
||||
|
||||
var visibleDeviceSelector Selector
|
||||
if visibleDevices == visibleDevicesAll {
|
||||
visibleDeviceSelector = StandardDevice()
|
||||
} else {
|
||||
visibleDeviceSelector = All(StandardDevice(), NewDeviceSelector(strings.Split(visibleDevices, ",")...))
|
||||
}
|
||||
|
||||
controlDeviceIds := getControlDeviceIDsFromEnvWithLogger(logger, env)
|
||||
controlDeviceSelector := All(ControlDevice(), NewDeviceSelector(controlDeviceIds...))
|
||||
|
||||
vd := devices{
|
||||
Discover: d,
|
||||
logger: logger,
|
||||
selector: Any(visibleDeviceSelector, controlDeviceSelector),
|
||||
}
|
||||
|
||||
return &vd
|
||||
}
|
||||
|
||||
// Devices returns the list of selected devices after filtering based on the
|
||||
// configured selector
|
||||
func (d devices) Devices() ([]discover.Device, error) {
|
||||
devices, err := d.Discover.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering devices: %v", err)
|
||||
}
|
||||
|
||||
var selected []discover.Device
|
||||
for _, di := range devices {
|
||||
if d.selector.Selected(di) {
|
||||
d.logger.Infof("selecting device=%v", di)
|
||||
selected = append(selected, di)
|
||||
}
|
||||
}
|
||||
|
||||
return selected, nil
|
||||
}
|
||||
104
internal/filter/devices_test.go
Normal file
104
internal/filter/devices_test.go
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
log "github.com/sirupsen/logrus"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestConstructor(t *testing.T) {
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
device0 := discover.Device{
|
||||
Index: "0",
|
||||
UUID: "0",
|
||||
PCIBusID: discover.PCIBusID("0"),
|
||||
}
|
||||
device1 := discover.Device{
|
||||
Index: "1",
|
||||
UUID: "1",
|
||||
PCIBusID: discover.PCIBusID("1"),
|
||||
}
|
||||
device2 := discover.Device{
|
||||
Index: "2",
|
||||
UUID: "2",
|
||||
PCIBusID: discover.PCIBusID("2"),
|
||||
}
|
||||
device3 := discover.Device{
|
||||
Index: "3",
|
||||
UUID: "3",
|
||||
PCIBusID: discover.PCIBusID("3"),
|
||||
}
|
||||
controlDevice := discover.Device{
|
||||
UUID: "CONTROL",
|
||||
}
|
||||
mockDevices := []discover.Device{
|
||||
device0,
|
||||
device1,
|
||||
device2,
|
||||
device3,
|
||||
controlDevice,
|
||||
}
|
||||
d := discoverMock{
|
||||
devices: mockDevices,
|
||||
}
|
||||
|
||||
var ok bool
|
||||
|
||||
withDefaultLogger, ok := NewSelectDevicesFrom(d, "all", nil).(*devices)
|
||||
require.True(t, ok)
|
||||
require.Same(t, log.StandardLogger(), withDefaultLogger.logger)
|
||||
|
||||
_, ok = NewSelectDevicesFromWithLogger(logger, d, "", nil).(*discover.None)
|
||||
require.True(t, ok)
|
||||
|
||||
_, ok = NewSelectDevicesFromWithLogger(logger, d, "void", nil).(*discover.None)
|
||||
require.True(t, ok)
|
||||
|
||||
_, ok = NewSelectDevicesFromWithLogger(logger, d, "none", nil).(*discover.None)
|
||||
require.True(t, ok)
|
||||
|
||||
all, ok := NewSelectDevicesFromWithLogger(logger, d, "all", nil).(*devices)
|
||||
require.True(t, ok)
|
||||
|
||||
devs, err := all.Devices()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, mockDevices, devs)
|
||||
|
||||
f, ok := NewSelectDevicesFromWithLogger(logger, d, "0", nil).(*devices)
|
||||
require.True(t, ok)
|
||||
|
||||
devs, err = f.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, devs, 2)
|
||||
require.ElementsMatch(t, devs, []discover.Device{device0, controlDevice})
|
||||
|
||||
f, ok = NewSelectDevicesFromWithLogger(logger, d, "0,2", nil).(*devices)
|
||||
require.True(t, ok)
|
||||
|
||||
devs, err = f.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, devs, 3)
|
||||
|
||||
require.ElementsMatch(t, devs, []discover.Device{device0, device2, controlDevice})
|
||||
}
|
||||
26
internal/filter/filter.go
Normal file
26
internal/filter/filter.go
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
//go:generate moq -stub -out filter_mock.go . EnvLookup
|
||||
|
||||
// EnvLookup defines an interface that supports the LookupEnv function for getting
|
||||
// environment variable values.
|
||||
// TODO: This belongs in a different package
|
||||
type EnvLookup interface {
|
||||
LookupEnv(string) (string, bool)
|
||||
}
|
||||
77
internal/filter/filter_mock.go
Normal file
77
internal/filter/filter_mock.go
Normal file
@@ -0,0 +1,77 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that EnvLookupMock does implement EnvLookup.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ EnvLookup = &EnvLookupMock{}
|
||||
|
||||
// EnvLookupMock is a mock implementation of EnvLookup.
|
||||
//
|
||||
// func TestSomethingThatUsesEnvLookup(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked EnvLookup
|
||||
// mockedEnvLookup := &EnvLookupMock{
|
||||
// LookupEnvFunc: func(s string) (string, bool) {
|
||||
// panic("mock out the LookupEnv method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedEnvLookup in code that requires EnvLookup
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type EnvLookupMock struct {
|
||||
// LookupEnvFunc mocks the LookupEnv method.
|
||||
LookupEnvFunc func(s string) (string, bool)
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// LookupEnv holds details about calls to the LookupEnv method.
|
||||
LookupEnv []struct {
|
||||
// S is the s argument value.
|
||||
S string
|
||||
}
|
||||
}
|
||||
lockLookupEnv sync.RWMutex
|
||||
}
|
||||
|
||||
// LookupEnv calls LookupEnvFunc.
|
||||
func (mock *EnvLookupMock) LookupEnv(s string) (string, bool) {
|
||||
callInfo := struct {
|
||||
S string
|
||||
}{
|
||||
S: s,
|
||||
}
|
||||
mock.lockLookupEnv.Lock()
|
||||
mock.calls.LookupEnv = append(mock.calls.LookupEnv, callInfo)
|
||||
mock.lockLookupEnv.Unlock()
|
||||
if mock.LookupEnvFunc == nil {
|
||||
var (
|
||||
sOut string
|
||||
bOut bool
|
||||
)
|
||||
return sOut, bOut
|
||||
}
|
||||
return mock.LookupEnvFunc(s)
|
||||
}
|
||||
|
||||
// LookupEnvCalls gets all the calls that were made to LookupEnv.
|
||||
// Check the length with:
|
||||
// len(mockedEnvLookup.LookupEnvCalls())
|
||||
func (mock *EnvLookupMock) LookupEnvCalls() []struct {
|
||||
S string
|
||||
} {
|
||||
var calls []struct {
|
||||
S string
|
||||
}
|
||||
mock.lockLookupEnv.RLock()
|
||||
calls = mock.calls.LookupEnv
|
||||
mock.lockLookupEnv.RUnlock()
|
||||
return calls
|
||||
}
|
||||
107
internal/filter/is_control_device.go
Normal file
107
internal/filter/is_control_device.go
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
devicesAll = "all"
|
||||
)
|
||||
|
||||
type controlDevices struct {
|
||||
discover.Discover
|
||||
logger *log.Logger
|
||||
selector Selector
|
||||
}
|
||||
|
||||
var _ discover.Discover = (*controlDevices)(nil)
|
||||
|
||||
// NewControlDevicesFrom creates a filter that selects devices based on the value of the
|
||||
// visible devices string.
|
||||
func NewControlDevicesFrom(d discover.Discover, env EnvLookup) Selector {
|
||||
return NewControlDevicesFromWithLogger(log.StandardLogger(), d, env)
|
||||
}
|
||||
|
||||
// NewControlDevicesFromWithLogger creates a filter as for NewControlDevicesFrom with the
|
||||
// specified logger.
|
||||
func NewControlDevicesFromWithLogger(logger *log.Logger, d discover.Discover, env EnvLookup) Selector {
|
||||
controlDevices := getControlDeviceIDsFromEnvWithLogger(logger, env)
|
||||
return NewDeviceSelector(controlDevices...)
|
||||
}
|
||||
|
||||
type controlDevice struct{}
|
||||
|
||||
// ControlDevice returns a selector for control devices
|
||||
func ControlDevice() Selector {
|
||||
return &controlDevice{}
|
||||
}
|
||||
|
||||
// Selected returns true for a controll devices and false for standard devices. A control device
|
||||
// has an empty index and PCI bus ID and a non-empty UUID.
|
||||
func (s controlDevice) Selected(device discover.Device) bool {
|
||||
if device.Index != "" {
|
||||
return false
|
||||
}
|
||||
if device.PCIBusID != "" {
|
||||
return false
|
||||
}
|
||||
if device.UUID == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func getControlDeviceIDsFromEnvWithLogger(logger *log.Logger, env EnvLookup) []string {
|
||||
controlDevices := []string{discover.ControlDeviceUUID}
|
||||
|
||||
migControlDevices := getMIGControlDevicesFromEnvWithLogger(logger, env)
|
||||
|
||||
return append(controlDevices, migControlDevices...)
|
||||
}
|
||||
|
||||
func getMIGControlDevicesFromEnvWithLogger(logger *log.Logger, env EnvLookup) []string {
|
||||
if env == nil {
|
||||
logger.Debugf("Environment not specified; no MIG Control devices selected")
|
||||
return []string{}
|
||||
}
|
||||
|
||||
var controlDevices []string
|
||||
|
||||
// Add MIG control devices
|
||||
migEnvUUIDMap := map[string]string{
|
||||
discover.MIGConfigDeviceUUID: "NVIDIA_MIG_CONFIG_DEVICES",
|
||||
discover.MIGMonitorDeviceUUID: "NVIDIA_MIG_MONITOR_DEVICES",
|
||||
}
|
||||
for uuid, migEnv := range migEnvUUIDMap {
|
||||
config, exists := env.LookupEnv(migEnv)
|
||||
if !exists {
|
||||
logger.Debugf("Envvar %v not set", migEnv)
|
||||
continue
|
||||
}
|
||||
if config == devicesAll {
|
||||
logger.Infof("Found %v=%v; selecting MIG %v devices", migEnv, config, uuid)
|
||||
controlDevices = append(controlDevices, uuid)
|
||||
} else {
|
||||
logger.Debugf("Found %v=%v; Skipping MIG %v devices (%v != %v)", migEnv, config, uuid, config, devicesAll)
|
||||
}
|
||||
}
|
||||
return controlDevices
|
||||
}
|
||||
123
internal/filter/is_control_device_test.go
Normal file
123
internal/filter/is_control_device_test.go
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestControlDevice(t *testing.T) {
|
||||
control := ControlDevice()
|
||||
|
||||
pcibusID := discover.PCIBusID("pcibusid")
|
||||
device := discover.Device{
|
||||
Index: "index",
|
||||
UUID: "uuid",
|
||||
PCIBusID: pcibusID,
|
||||
}
|
||||
require.False(t, control.Selected(device))
|
||||
|
||||
require.False(t, control.Selected(
|
||||
discover.Device{UUID: "uuid", PCIBusID: pcibusID},
|
||||
))
|
||||
|
||||
require.False(t, control.Selected(
|
||||
discover.Device{Index: "index", PCIBusID: pcibusID},
|
||||
))
|
||||
|
||||
require.False(t, control.Selected(
|
||||
discover.Device{Index: "index", UUID: "uuid"},
|
||||
))
|
||||
|
||||
require.False(t, control.Selected(discover.Device{}))
|
||||
|
||||
require.True(t, control.Selected(discover.Device{UUID: "uuid"}))
|
||||
}
|
||||
|
||||
func TestGetControlDevicesFromEnv(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
env map[string]string
|
||||
expectedIds []string
|
||||
}{
|
||||
{
|
||||
name: "nil environment",
|
||||
env: nil,
|
||||
expectedIds: []string{"CONTROL"},
|
||||
},
|
||||
{
|
||||
name: "empty environment",
|
||||
env: map[string]string{},
|
||||
expectedIds: []string{"CONTROL"},
|
||||
},
|
||||
{
|
||||
name: "MIG monitor blank",
|
||||
env: map[string]string{"NVIDIA_MIG_MONITOR_DEVICES": ""},
|
||||
expectedIds: []string{"CONTROL"},
|
||||
},
|
||||
{
|
||||
name: "MIG config blank",
|
||||
env: map[string]string{"NVIDIA_MIG_CONFIG_DEVICES": ""},
|
||||
expectedIds: []string{"CONTROL"},
|
||||
},
|
||||
{
|
||||
name: "MIG monitor not all",
|
||||
env: map[string]string{"NVIDIA_MIG_MONITOR_DEVICES": "not-all"},
|
||||
expectedIds: []string{"CONTROL"},
|
||||
},
|
||||
{
|
||||
name: "MIG config not all",
|
||||
env: map[string]string{"NVIDIA_MIG_CONFIG_DEVICES": "not-all"},
|
||||
expectedIds: []string{"CONTROL"},
|
||||
},
|
||||
{
|
||||
name: "MIG monitor all",
|
||||
env: map[string]string{"NVIDIA_MIG_MONITOR_DEVICES": "all"},
|
||||
expectedIds: []string{"CONTROL", "MONITOR"},
|
||||
},
|
||||
{
|
||||
name: "MIG config all",
|
||||
env: map[string]string{"NVIDIA_MIG_CONFIG_DEVICES": "all"},
|
||||
expectedIds: []string{"CONTROL", "CONFIG"},
|
||||
},
|
||||
{
|
||||
name: "MIG config and monitor all",
|
||||
env: map[string]string{"NVIDIA_MIG_CONFIG_DEVICES": "all", "NVIDIA_MIG_MONITOR_DEVICES": "all"},
|
||||
expectedIds: []string{"CONTROL", "CONFIG", "MONITOR"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
t.Run(fmt.Sprintf("%d: %s", i, tc.name), func(t *testing.T) {
|
||||
deviceIDs := getControlDeviceIDsFromEnvWithLogger(logger, &EnvLookupMock{
|
||||
LookupEnvFunc: func(s string) (string, bool) {
|
||||
value, exists := tc.env[s]
|
||||
return value, exists
|
||||
},
|
||||
})
|
||||
require.ElementsMatch(t, tc.expectedIds, deviceIDs)
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
41
internal/filter/is_standard_device.go
Normal file
41
internal/filter/is_standard_device.go
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
|
||||
type standardDevice struct{}
|
||||
|
||||
// StandardDevice returns a selector for regular (non-control) devices
|
||||
func StandardDevice() Selector {
|
||||
return &standardDevice{}
|
||||
}
|
||||
|
||||
// Selected returns true for a standard device and false for controll devices. A regular device
|
||||
// is expected to have an index, uuid, and PCI bus ID.
|
||||
func (s standardDevice) Selected(device discover.Device) bool {
|
||||
if device.Index == "" {
|
||||
return false
|
||||
}
|
||||
if device.PCIBusID == "" {
|
||||
return false
|
||||
}
|
||||
if device.UUID == "" {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
52
internal/filter/is_standard_device_test.go
Normal file
52
internal/filter/is_standard_device_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestStandardDevice(t *testing.T) {
|
||||
standard := StandardDevice()
|
||||
|
||||
pcibusID := discover.PCIBusID("pcibusid")
|
||||
|
||||
device := discover.Device{
|
||||
Index: "index",
|
||||
UUID: "uuid",
|
||||
PCIBusID: pcibusID,
|
||||
}
|
||||
require.True(t, standard.Selected(device))
|
||||
|
||||
require.False(t, standard.Selected(
|
||||
discover.Device{UUID: "uuid", PCIBusID: pcibusID},
|
||||
))
|
||||
|
||||
require.False(t, standard.Selected(
|
||||
discover.Device{Index: "index", PCIBusID: pcibusID},
|
||||
))
|
||||
|
||||
require.False(t, standard.Selected(
|
||||
discover.Device{Index: "index", UUID: "uuid"},
|
||||
))
|
||||
|
||||
require.False(t, standard.Selected(discover.Device{}))
|
||||
|
||||
}
|
||||
27
internal/filter/selector.go
Normal file
27
internal/filter/selector.go
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package filter
|
||||
|
||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
|
||||
//go:generate moq -stub -out selector_mock.go . Selector
|
||||
|
||||
// Selector defines an interface for determining whether a specfied Device is selected
|
||||
// by a particular configuration.
|
||||
type Selector interface {
|
||||
Selected(discover.Device) bool
|
||||
}
|
||||
77
internal/filter/selector_mock.go
Normal file
77
internal/filter/selector_mock.go
Normal file
@@ -0,0 +1,77 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that SelectorMock does implement Selector.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ Selector = &SelectorMock{}
|
||||
|
||||
// SelectorMock is a mock implementation of Selector.
|
||||
//
|
||||
// func TestSomethingThatUsesSelector(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked Selector
|
||||
// mockedSelector := &SelectorMock{
|
||||
// SelectedFunc: func(device discover.Device) bool {
|
||||
// panic("mock out the Selected method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedSelector in code that requires Selector
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type SelectorMock struct {
|
||||
// SelectedFunc mocks the Selected method.
|
||||
SelectedFunc func(device discover.Device) bool
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Selected holds details about calls to the Selected method.
|
||||
Selected []struct {
|
||||
// Device is the device argument value.
|
||||
Device discover.Device
|
||||
}
|
||||
}
|
||||
lockSelected sync.RWMutex
|
||||
}
|
||||
|
||||
// Selected calls SelectedFunc.
|
||||
func (mock *SelectorMock) Selected(device discover.Device) bool {
|
||||
callInfo := struct {
|
||||
Device discover.Device
|
||||
}{
|
||||
Device: device,
|
||||
}
|
||||
mock.lockSelected.Lock()
|
||||
mock.calls.Selected = append(mock.calls.Selected, callInfo)
|
||||
mock.lockSelected.Unlock()
|
||||
if mock.SelectedFunc == nil {
|
||||
var (
|
||||
bOut bool
|
||||
)
|
||||
return bOut
|
||||
}
|
||||
return mock.SelectedFunc(device)
|
||||
}
|
||||
|
||||
// SelectedCalls gets all the calls that were made to Selected.
|
||||
// Check the length with:
|
||||
// len(mockedSelector.SelectedCalls())
|
||||
func (mock *SelectorMock) SelectedCalls() []struct {
|
||||
Device discover.Device
|
||||
} {
|
||||
var calls []struct {
|
||||
Device discover.Device
|
||||
}
|
||||
mock.lockSelected.RLock()
|
||||
calls = mock.calls.Selected
|
||||
mock.lockSelected.RUnlock()
|
||||
return calls
|
||||
}
|
||||
239
internal/ldcache/ldcache.go
Normal file
239
internal/ldcache/ldcache.go
Normal file
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
// Adapted from https://github.com/rai-project/ldcache
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const ldcachePath = "/etc/ld.so.cache"
|
||||
|
||||
const (
|
||||
magicString1 = "ld.so-1.7.0"
|
||||
magicString2 = "glibc-ld.so.cache"
|
||||
magicVersion = "1.1"
|
||||
)
|
||||
|
||||
const (
|
||||
flagTypeMask = 0x00ff
|
||||
flagTypeELF = 0x0001
|
||||
|
||||
flagArchMask = 0xff00
|
||||
flagArchI386 = 0x0000
|
||||
flagArchX8664 = 0x0300
|
||||
flagArchX32 = 0x0800
|
||||
flagArchPpc64le = 0x0500
|
||||
)
|
||||
|
||||
var ErrInvalidCache = errors.New("invalid ld.so.cache file")
|
||||
|
||||
type Header1 struct {
|
||||
Magic [len(magicString1) + 1]byte // include null delimiter
|
||||
NLibs uint32
|
||||
}
|
||||
|
||||
type Entry1 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
}
|
||||
|
||||
type Header2 struct {
|
||||
Magic [len(magicString2)]byte
|
||||
Version [len(magicVersion)]byte
|
||||
NLibs uint32
|
||||
TableSize uint32
|
||||
_ [3]uint32 // unused
|
||||
_ uint64 // force 8 byte alignment
|
||||
}
|
||||
|
||||
type Entry2 struct {
|
||||
Flags int32
|
||||
Key, Value uint32
|
||||
OSVersion uint32
|
||||
HWCap uint64
|
||||
}
|
||||
|
||||
type LDCache struct {
|
||||
*bytes.Reader
|
||||
|
||||
data, libs []byte
|
||||
header Header2
|
||||
entries []Entry2
|
||||
|
||||
root string
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
func NewLDCacheWithLogger(logger *log.Logger, root string) (*LDCache, error) {
|
||||
return openWithRoot(logger, root)
|
||||
}
|
||||
|
||||
func Open() (*LDCache, error) {
|
||||
return openWithRoot(log.StandardLogger(), "")
|
||||
}
|
||||
|
||||
func openWithRoot(logger *log.Logger, root string) (*LDCache, error) {
|
||||
path := filepath.Join(root, ldcachePath)
|
||||
|
||||
logger.Debugf("Opening ld.conf at %v", path)
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d, err := syscall.Mmap(int(f.Fd()), 0, int(fi.Size()),
|
||||
syscall.PROT_READ, syscall.MAP_PRIVATE)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache := &LDCache{
|
||||
data: d,
|
||||
Reader: bytes.NewReader(d),
|
||||
root: root,
|
||||
logger: logger,
|
||||
}
|
||||
return cache, cache.parse()
|
||||
}
|
||||
|
||||
func (c *LDCache) Close() error {
|
||||
return syscall.Munmap(c.data)
|
||||
}
|
||||
|
||||
func (c *LDCache) Magic() string {
|
||||
return string(c.header.Magic[:])
|
||||
}
|
||||
|
||||
func (c *LDCache) Version() string {
|
||||
return string(c.header.Version[:])
|
||||
}
|
||||
|
||||
func strn(b []byte, n int) string {
|
||||
return string(b[:n])
|
||||
}
|
||||
|
||||
func (c *LDCache) parse() error {
|
||||
var header Header1
|
||||
|
||||
// Check for the old format (< glibc-2.2)
|
||||
if c.Len() <= int(unsafe.Sizeof(header)) {
|
||||
return ErrInvalidCache
|
||||
}
|
||||
if strn(c.data, len(magicString1)) == magicString1 {
|
||||
if err := binary.Read(c, binary.LittleEndian, &header); err != nil {
|
||||
return err
|
||||
}
|
||||
n := int64(header.NLibs) * int64(unsafe.Sizeof(Entry1{}))
|
||||
offset, err := c.Seek(n, 1) // skip old entries
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n = (-offset) & int64(unsafe.Alignof(c.header)-1)
|
||||
_, err = c.Seek(n, 1) // skip padding
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c.libs = c.data[c.Size()-int64(c.Len()):] // kv offsets start here
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.header); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Magic() != magicString2 || c.Version() != magicVersion {
|
||||
return ErrInvalidCache
|
||||
}
|
||||
c.entries = make([]Entry2, c.header.NLibs)
|
||||
if err := binary.Read(c, binary.LittleEndian, &c.entries); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *LDCache) Lookup(libs ...string) (paths32, paths64 []string) {
|
||||
c.logger.Debugf("Looking up %v in cache", libs)
|
||||
type void struct{}
|
||||
var paths *[]string
|
||||
|
||||
set := make(map[string]void)
|
||||
prefix := make([][]byte, len(libs))
|
||||
|
||||
for i := range libs {
|
||||
prefix[i] = []byte(libs[i])
|
||||
}
|
||||
for _, e := range c.entries {
|
||||
if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 {
|
||||
continue
|
||||
}
|
||||
switch e.Flags & flagArchMask {
|
||||
case flagArchX8664:
|
||||
fallthrough
|
||||
case flagArchPpc64le:
|
||||
paths = &paths64
|
||||
case flagArchX32:
|
||||
fallthrough
|
||||
case flagArchI386:
|
||||
paths = &paths32
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) {
|
||||
continue
|
||||
}
|
||||
lib := c.libs[e.Key:]
|
||||
value := c.libs[e.Value:]
|
||||
|
||||
for _, p := range prefix {
|
||||
if bytes.HasPrefix(lib, p) {
|
||||
n := bytes.IndexByte(value, 0)
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
name := filepath.Join(c.root, strn(value, n))
|
||||
c.logger.Debugf("checking %v", string(name))
|
||||
|
||||
path, err := filepath.EvalSymlinks(name)
|
||||
if err != nil {
|
||||
c.logger.Debugf("could not resolve symlink for %v", name)
|
||||
break
|
||||
}
|
||||
if _, ok := set[path]; ok {
|
||||
break
|
||||
}
|
||||
set[path] = void{}
|
||||
*paths = append(*paths, path)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
78
internal/lookup/file.go
Normal file
78
internal/lookup/file.go
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type file struct {
|
||||
logger *log.Logger
|
||||
prefixes []string
|
||||
filter func(string) error
|
||||
}
|
||||
|
||||
func NewFileLocator(root string) Locator {
|
||||
return NewFileLocatorWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
func NewFileLocatorWithLogger(logger *log.Logger, root string) Locator {
|
||||
l := file{
|
||||
logger: logger,
|
||||
prefixes: []string{root},
|
||||
filter: assertFile,
|
||||
}
|
||||
|
||||
return &l
|
||||
}
|
||||
|
||||
var _ Locator = (*file)(nil)
|
||||
|
||||
func (p file) Locate(filename string) ([]string, error) {
|
||||
var filenames []string
|
||||
for _, prefix := range p.prefixes {
|
||||
candidate := filepath.Join(prefix, filename)
|
||||
p.logger.Debugf("Checking candidate '%v'", candidate)
|
||||
err := p.filter(candidate)
|
||||
if err != nil {
|
||||
p.logger.Debugf("Candidate '%v' does not meet requirements: %v", candidate, err)
|
||||
continue
|
||||
}
|
||||
filenames = append(filenames, candidate)
|
||||
}
|
||||
if len(filename) == 0 {
|
||||
return nil, fmt.Errorf("file %v not found", filename)
|
||||
}
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
func assertFile(filename string) error {
|
||||
info, err := os.Stat(filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting info for %v: %v", filename, err)
|
||||
}
|
||||
|
||||
if info.IsDir() {
|
||||
return fmt.Errorf("specified path '%v' is a directory", filename)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
65
internal/lookup/library.go
Normal file
65
internal/lookup/library.go
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type library struct {
|
||||
logger *log.Logger
|
||||
cache *ldcache.LDCache
|
||||
}
|
||||
|
||||
var _ Locator = (*library)(nil)
|
||||
|
||||
// NewLibraryLocatorWithLogger creates a library locator using the standard logger
|
||||
func NewLibraryLocator(root string) (Locator, error) {
|
||||
return NewLibraryLocatorWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewLibraryLocatorWithLogger creates a library locator using the specified logger.
|
||||
func NewLibraryLocatorWithLogger(logger *log.Logger, root string) (Locator, error) {
|
||||
logger.Infof("Reading ldcache at %v", root)
|
||||
cache, err := ldcache.NewLDCacheWithLogger(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading ldcache: %v", err)
|
||||
}
|
||||
|
||||
l := library{
|
||||
logger: logger,
|
||||
cache: cache,
|
||||
}
|
||||
|
||||
return &l, nil
|
||||
}
|
||||
|
||||
func (l library) Locate(libname string) ([]string, error) {
|
||||
paths32, paths64 := l.cache.Lookup(libname)
|
||||
if len(paths32) > 0 {
|
||||
l.logger.Warnf("Ignoring 32-bit libraries for %v: %v", libname, paths32)
|
||||
}
|
||||
|
||||
if len(paths64) == 0 {
|
||||
return nil, fmt.Errorf("64-bit library %v not found", libname)
|
||||
}
|
||||
|
||||
return paths64, nil
|
||||
}
|
||||
24
internal/lookup/lookup.go
Normal file
24
internal/lookup/lookup.go
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
|
||||
//go:generate moq -stub -out lookup_mock.go . Locator
|
||||
|
||||
// Locator defines the interface for locating files on a system.
|
||||
type Locator interface {
|
||||
Locate(string) ([]string, error)
|
||||
}
|
||||
77
internal/lookup/lookup_mock.go
Normal file
77
internal/lookup/lookup_mock.go
Normal file
@@ -0,0 +1,77 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that LocatorMock does implement Locator.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ Locator = &LocatorMock{}
|
||||
|
||||
// LocatorMock is a mock implementation of Locator.
|
||||
//
|
||||
// func TestSomethingThatUsesLocator(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked Locator
|
||||
// mockedLocator := &LocatorMock{
|
||||
// LocateFunc: func(s string) ([]string, error) {
|
||||
// panic("mock out the Locate method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedLocator in code that requires Locator
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type LocatorMock struct {
|
||||
// LocateFunc mocks the Locate method.
|
||||
LocateFunc func(s string) ([]string, error)
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Locate holds details about calls to the Locate method.
|
||||
Locate []struct {
|
||||
// S is the s argument value.
|
||||
S string
|
||||
}
|
||||
}
|
||||
lockLocate sync.RWMutex
|
||||
}
|
||||
|
||||
// Locate calls LocateFunc.
|
||||
func (mock *LocatorMock) Locate(s string) ([]string, error) {
|
||||
callInfo := struct {
|
||||
S string
|
||||
}{
|
||||
S: s,
|
||||
}
|
||||
mock.lockLocate.Lock()
|
||||
mock.calls.Locate = append(mock.calls.Locate, callInfo)
|
||||
mock.lockLocate.Unlock()
|
||||
if mock.LocateFunc == nil {
|
||||
var (
|
||||
stringsOut []string
|
||||
errOut error
|
||||
)
|
||||
return stringsOut, errOut
|
||||
}
|
||||
return mock.LocateFunc(s)
|
||||
}
|
||||
|
||||
// LocateCalls gets all the calls that were made to Locate.
|
||||
// Check the length with:
|
||||
// len(mockedLocator.LocateCalls())
|
||||
func (mock *LocatorMock) LocateCalls() []struct {
|
||||
S string
|
||||
} {
|
||||
var calls []struct {
|
||||
S string
|
||||
}
|
||||
mock.lockLocate.RLock()
|
||||
calls = mock.calls.Locate
|
||||
mock.lockLocate.RUnlock()
|
||||
return calls
|
||||
}
|
||||
94
internal/lookup/path.go
Normal file
94
internal/lookup/path.go
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
envPath = "PATH"
|
||||
)
|
||||
|
||||
var defaultPaths = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
|
||||
|
||||
type path struct {
|
||||
file
|
||||
}
|
||||
|
||||
func NewPathLocator(root string) Locator {
|
||||
return NewPathLocatorWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
func NewPathLocatorWithLogger(logger *log.Logger, root string) Locator {
|
||||
pathEnv := os.Getenv(envPath)
|
||||
paths := filepath.SplitList(pathEnv)
|
||||
|
||||
if root != "" {
|
||||
paths = append(paths, defaultPaths...)
|
||||
}
|
||||
|
||||
var prefixes []string
|
||||
for _, dir := range paths {
|
||||
prefixes = append(prefixes, filepath.Join(root, dir))
|
||||
}
|
||||
l := path{
|
||||
file: file{
|
||||
logger: logger,
|
||||
prefixes: prefixes,
|
||||
filter: assertExecutable,
|
||||
},
|
||||
}
|
||||
return &l
|
||||
}
|
||||
|
||||
var _ Locator = (*path)(nil)
|
||||
|
||||
func (p path) Locate(filename string) ([]string, error) {
|
||||
// For absolute paths we ensure that it is executable
|
||||
if strings.Contains(filename, "/") {
|
||||
err := assertExecutable(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("absolute path %v is not an executable file: %v", filename, err)
|
||||
}
|
||||
return []string{filename}, nil
|
||||
}
|
||||
|
||||
return p.file.Locate(filename)
|
||||
}
|
||||
|
||||
func assertExecutable(filename string) error {
|
||||
err := assertFile(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
info, err := os.Stat(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if info.Mode()&0111 == 0 {
|
||||
return fmt.Errorf("specified file '%v' is not executable", filename)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
118
internal/modify/device.go
Normal file
118
internal/modify/device.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package modify
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Device is an alias to discover.Device that allows for addition of a Modify method
|
||||
type Device struct {
|
||||
logger *log.Logger
|
||||
discover.Device
|
||||
}
|
||||
|
||||
// ProcMount is an alias to discover.Mount that allows for the addition of a Modify method for
|
||||
// proc paths associated with devices
|
||||
type ProcMount struct {
|
||||
logger *log.Logger
|
||||
discover.ProcPath
|
||||
}
|
||||
|
||||
var _ Modifier = (*Device)(nil)
|
||||
var _ Modifier = (*ProcMount)(nil)
|
||||
|
||||
// Modify applies the modifications required by a Device to the specified OCI specification
|
||||
func (d Device) Modify(spec oci.Spec) error {
|
||||
for _, dn := range d.DeviceNodes {
|
||||
mi := deviceNode{
|
||||
logger: d.logger,
|
||||
DeviceNode: dn,
|
||||
}
|
||||
err := mi.Modify(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not inject device node %v: %v", dn, err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, p := range d.ProcPaths {
|
||||
mi := ProcMount{
|
||||
logger: d.logger,
|
||||
ProcPath: p,
|
||||
}
|
||||
err := mi.Modify(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not inject proc path %v: %v", p, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type deviceNode struct {
|
||||
logger *log.Logger
|
||||
discover.DeviceNode
|
||||
}
|
||||
|
||||
func (d deviceNode) Modify(spec oci.Spec) error {
|
||||
return spec.Modify(d.specModifier)
|
||||
}
|
||||
|
||||
func (d deviceNode) specModifier(spec *specs.Spec) error {
|
||||
if spec.Linux == nil {
|
||||
d.logger.Debugf("Initializing spec.Linux")
|
||||
spec.Linux = &specs.Linux{}
|
||||
}
|
||||
if spec.Linux.Resources == nil {
|
||||
d.logger.Debugf("Initializing spec.LinuxResources")
|
||||
spec.Linux.Resources = &specs.LinuxResources{}
|
||||
}
|
||||
|
||||
// TODO: These need to be configurable
|
||||
deviceFileMode := os.FileMode(8630)
|
||||
deviceUID := uint32(0)
|
||||
deviceGID := uint32(0)
|
||||
|
||||
deviceMajor := int64(d.Major)
|
||||
deviceMinor := int64(d.Minor)
|
||||
|
||||
d.logger.Infof("Adding device %v", d.Path)
|
||||
ociDevice := specs.LinuxDevice{
|
||||
Path: string(d.Path),
|
||||
Type: "c",
|
||||
Major: deviceMajor,
|
||||
Minor: deviceMinor,
|
||||
FileMode: &deviceFileMode,
|
||||
UID: &deviceUID,
|
||||
GID: &deviceGID,
|
||||
}
|
||||
spec.Linux.Devices = append(spec.Linux.Devices, ociDevice)
|
||||
|
||||
ociDeviceCgroup := specs.LinuxDeviceCgroup{
|
||||
Allow: true,
|
||||
Type: "c",
|
||||
Major: &deviceMajor,
|
||||
Minor: &deviceMinor,
|
||||
Access: "rwm",
|
||||
}
|
||||
|
||||
// TODO: We have to handle the case where we are updating the cgroups for multiple devices
|
||||
// leading to duplicates in the spec
|
||||
spec.Linux.Resources.Devices = append(spec.Linux.Resources.Devices, ociDeviceCgroup)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Modify applies the modifications required for a Mount to the specified OCI specification
|
||||
func (m ProcMount) Modify(spec oci.Spec) error {
|
||||
return spec.Modify(m.specModifier)
|
||||
}
|
||||
|
||||
func (m ProcMount) specModifier(spec *specs.Spec) error {
|
||||
m.logger.Infof("Mounting read-only proc path %v", m.ProcPath)
|
||||
spec.Linux.ReadonlyPaths = append(spec.Linux.ReadonlyPaths, string(m.ProcPath))
|
||||
return nil
|
||||
}
|
||||
155
internal/modify/discover.go
Normal file
155
internal/modify/discover.go
Normal file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package modify
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type discoverModifier struct {
|
||||
logger *log.Logger
|
||||
discover discover.Discover
|
||||
root string
|
||||
bundleDir string
|
||||
}
|
||||
|
||||
var _ Modifier = (*discoverModifier)(nil)
|
||||
|
||||
// NewModifierFor creates a Modifier that can be used to apply the modifications to an OCI specification
|
||||
// required by the specified Discover instance.
|
||||
func NewModifierFor(discover discover.Discover, root string, bundleDir string) Modifier {
|
||||
return NewModifierWithLoggerFor(log.StandardLogger(), discover, root, bundleDir)
|
||||
}
|
||||
|
||||
// NewModifierWithLoggerFor creates a Modifier that can be used to apply the modifications to an OCI specification
|
||||
// required by the specified Discover instance.
|
||||
func NewModifierWithLoggerFor(logger *log.Logger, discover discover.Discover, root string, bundleDir string) Modifier {
|
||||
m := discoverModifier{
|
||||
logger: logger,
|
||||
discover: discover,
|
||||
root: root,
|
||||
bundleDir: bundleDir,
|
||||
}
|
||||
|
||||
return &m
|
||||
}
|
||||
|
||||
// Modify applies the modifications for the discovered devices, mounts, etc. to the specified
|
||||
// OCI spec.
|
||||
func (m discoverModifier) Modify(spec oci.Spec) error {
|
||||
m.logger.Infof("Determining required OCI spec modifications")
|
||||
modifiers, err := m.modifiers()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing modifiers: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Applying %v modifications", len(modifiers))
|
||||
for _, mi := range modifiers {
|
||||
err := mi.Modify(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not apply modifier %v: %v", mi, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m discoverModifier) modifiers() ([]Modifier, error) {
|
||||
var modifiers []Modifier
|
||||
|
||||
deviceModifiers, err := m.deviceModifiers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modifiers = append(modifiers, deviceModifiers...)
|
||||
|
||||
mountModifiers, err := m.mountModifiers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modifiers = append(modifiers, mountModifiers...)
|
||||
|
||||
hookModifiers, err := m.hookModifiers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modifiers = append(modifiers, hookModifiers...)
|
||||
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
func (m discoverModifier) deviceModifiers() ([]Modifier, error) {
|
||||
var modifiers []Modifier
|
||||
|
||||
devices, err := m.discover.Devices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering devices: %v", err)
|
||||
}
|
||||
|
||||
for _, d := range devices {
|
||||
m := Device{
|
||||
logger: m.logger,
|
||||
Device: d,
|
||||
}
|
||||
modifiers = append(modifiers, m)
|
||||
}
|
||||
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
func (m discoverModifier) mountModifiers() ([]Modifier, error) {
|
||||
var modifiers []Modifier
|
||||
|
||||
mounts, err := m.discover.Mounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering mounts: %v", err)
|
||||
}
|
||||
|
||||
for _, mi := range mounts {
|
||||
mm := Mount{
|
||||
logger: m.logger,
|
||||
Mount: mi,
|
||||
root: m.root,
|
||||
}
|
||||
modifiers = append(modifiers, mm)
|
||||
}
|
||||
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
func (m discoverModifier) hookModifiers() ([]Modifier, error) {
|
||||
var modifiers []Modifier
|
||||
|
||||
hooks, err := m.discover.Hooks()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering hooks: %v", err)
|
||||
}
|
||||
|
||||
for _, h := range hooks {
|
||||
m := Hook{
|
||||
logger: m.logger,
|
||||
Hook: h,
|
||||
bundleDir: m.bundleDir,
|
||||
}
|
||||
modifiers = append(modifiers, m)
|
||||
}
|
||||
|
||||
return modifiers, nil
|
||||
}
|
||||
81
internal/modify/hooks.go
Normal file
81
internal/modify/hooks.go
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package modify
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Hook is an alias to discover.Hook that allows for addition of a Modify method
|
||||
type Hook struct {
|
||||
logger *log.Logger
|
||||
discover.Hook
|
||||
bundleDir string
|
||||
}
|
||||
|
||||
var _ Modifier = (*Hook)(nil)
|
||||
|
||||
// Modify applies the modifications required by a Hook to the specified OCI specification
|
||||
func (h Hook) Modify(spec oci.Spec) error {
|
||||
return spec.Modify(h.specModifier)
|
||||
}
|
||||
|
||||
func (h Hook) specModifier(spec *specs.Spec) error {
|
||||
if spec.Hooks == nil {
|
||||
h.logger.Debugf("Initializing spec.Hooks")
|
||||
spec.Hooks = &specs.Hooks{}
|
||||
}
|
||||
|
||||
// TODO: This is duplicated in the hook specification
|
||||
const rootPattern = "@Root.Path@"
|
||||
|
||||
rootPath := spec.Root.Path
|
||||
if !filepath.IsAbs(rootPath) {
|
||||
rootPath = filepath.Join(h.bundleDir, rootPath)
|
||||
}
|
||||
|
||||
var args []string
|
||||
for _, a := range h.Args {
|
||||
if strings.Contains(a, rootPattern) {
|
||||
args = append(args, strings.ReplaceAll(a, rootPattern, rootPath))
|
||||
continue
|
||||
}
|
||||
args = append(args, a)
|
||||
}
|
||||
|
||||
specHook := specs.Hook{
|
||||
Path: h.Path,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
h.logger.Infof("Adding %v hook %+v", h.HookName, specHook)
|
||||
switch h.HookName {
|
||||
case "create-container":
|
||||
spec.Hooks.CreateContainer = append(spec.Hooks.CreateContainer, specHook)
|
||||
default:
|
||||
return fmt.Errorf("unexpected hook name: %v", h.HookName)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
28
internal/modify/modify.go
Normal file
28
internal/modify/modify.go
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package modify
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
)
|
||||
|
||||
//go:generate moq -stub -out modify_mock.go . Modifier
|
||||
|
||||
// Modifier defines an interface for modifying an OCI Specification.
|
||||
type Modifier interface {
|
||||
Modify(oci.Spec) error
|
||||
}
|
||||
77
internal/modify/modify_mock.go
Normal file
77
internal/modify/modify_mock.go
Normal file
@@ -0,0 +1,77 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package modify
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that ModifierMock does implement Modifier.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ Modifier = &ModifierMock{}
|
||||
|
||||
// ModifierMock is a mock implementation of Modifier.
|
||||
//
|
||||
// func TestSomethingThatUsesModifier(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked Modifier
|
||||
// mockedModifier := &ModifierMock{
|
||||
// ModifyFunc: func(spec oci.Spec) error {
|
||||
// panic("mock out the Modify method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedModifier in code that requires Modifier
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type ModifierMock struct {
|
||||
// ModifyFunc mocks the Modify method.
|
||||
ModifyFunc func(spec oci.Spec) error
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Modify holds details about calls to the Modify method.
|
||||
Modify []struct {
|
||||
// Spec is the spec argument value.
|
||||
Spec oci.Spec
|
||||
}
|
||||
}
|
||||
lockModify sync.RWMutex
|
||||
}
|
||||
|
||||
// Modify calls ModifyFunc.
|
||||
func (mock *ModifierMock) Modify(spec oci.Spec) error {
|
||||
callInfo := struct {
|
||||
Spec oci.Spec
|
||||
}{
|
||||
Spec: spec,
|
||||
}
|
||||
mock.lockModify.Lock()
|
||||
mock.calls.Modify = append(mock.calls.Modify, callInfo)
|
||||
mock.lockModify.Unlock()
|
||||
if mock.ModifyFunc == nil {
|
||||
var (
|
||||
errOut error
|
||||
)
|
||||
return errOut
|
||||
}
|
||||
return mock.ModifyFunc(spec)
|
||||
}
|
||||
|
||||
// ModifyCalls gets all the calls that were made to Modify.
|
||||
// Check the length with:
|
||||
// len(mockedModifier.ModifyCalls())
|
||||
func (mock *ModifierMock) ModifyCalls() []struct {
|
||||
Spec oci.Spec
|
||||
} {
|
||||
var calls []struct {
|
||||
Spec oci.Spec
|
||||
}
|
||||
mock.lockModify.RLock()
|
||||
calls = mock.calls.Modify
|
||||
mock.lockModify.RUnlock()
|
||||
return calls
|
||||
}
|
||||
53
internal/modify/mounts.go
Normal file
53
internal/modify/mounts.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package modify
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Mount is an alias to discover.Mount that allows for addition of a Modify method
|
||||
type Mount struct {
|
||||
logger *log.Logger
|
||||
discover.Mount
|
||||
root string
|
||||
}
|
||||
|
||||
var _ Modifier = (*Mount)(nil)
|
||||
|
||||
// Modify applies the modifications required for a Mount to the specified OCI specification
|
||||
func (d Mount) Modify(spec oci.Spec) error {
|
||||
return spec.Modify(d.specModifier)
|
||||
}
|
||||
|
||||
// TODO: We need to ensure that we are correctly mounting the proc paths
|
||||
// Also — I’m not sure how this is done, but we will need a new tempfs mounted at /proc/driver/nvidia/ underneath which all of these other mounted directories get put
|
||||
// Maybe this?
|
||||
// https://github.com/opencontainers/runtime-spec/blob/master/specs-go/config.go#L175 (edited)
|
||||
// specs-go/config.go:175
|
||||
// MaskedPaths []string `json:"maskedPaths,omitempty"`
|
||||
// <https://github.com/opencontainers/runtime-spec|opencontainers/runtime-spec>opencontainers/runtime-spec | Added by GitHub
|
||||
// 13:53
|
||||
// Proabably, given…
|
||||
// https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#masked-paths (edited)
|
||||
// TODO: We can try masking all of /proc/driver/nvidia and then mounting the paths read-only
|
||||
func (d Mount) specModifier(spec *specs.Spec) error {
|
||||
source := d.Path
|
||||
destination := strings.TrimPrefix(d.Path, d.root)
|
||||
d.logger.Infof("Mounting %v -> %v", source, destination)
|
||||
mount := specs.Mount{
|
||||
Destination: destination,
|
||||
Source: source,
|
||||
Type: "bind",
|
||||
Options: []string{
|
||||
"rbind",
|
||||
"rprivate",
|
||||
},
|
||||
}
|
||||
spec.Mounts = append(spec.Mounts, mount)
|
||||
|
||||
return nil
|
||||
}
|
||||
141
internal/nvcaps/nvcaps.go
Normal file
141
internal/nvcaps/nvcaps.go
Normal file
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvcaps
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaProcDriverPath = "/proc/driver/nvidia"
|
||||
nvidiaCapabilitiesPath = nvidiaProcDriverPath + "/capabilities"
|
||||
|
||||
nvcapsProcDriverPath = "/proc/driver/nvidia-caps"
|
||||
nvcapsMigMinorsPath = nvcapsProcDriverPath + "/mig-minors"
|
||||
nvcapsDevicePath = "/dev/nvidia-caps"
|
||||
)
|
||||
|
||||
// MigMinor represents the minor number of a MIG device
|
||||
type MigMinor int
|
||||
|
||||
// MigCap represents the path to a MIG cap file
|
||||
type MigCap string
|
||||
|
||||
// LoadMigMinors loads the MIG minors file and returns its contents as a map
|
||||
func LoadMigMinors() (map[MigCap]MigMinor, error) {
|
||||
// Open nvcapsMigMinorsPath for walking.
|
||||
// If the nvcapsMigMinorsPath does not exist, then we are not on a MIG
|
||||
// capable machine, so there is nothing to do.
|
||||
// The format of this file is discussed in:
|
||||
// https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#unique_1576522674
|
||||
minorsFile, err := os.Open(nvcapsMigMinorsPath)
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening MIG minors file: %v", err)
|
||||
}
|
||||
defer minorsFile.Close()
|
||||
|
||||
return processMinorsFile(minorsFile), nil
|
||||
}
|
||||
|
||||
func processMinorsFile(minorsFile io.Reader) map[MigCap]MigMinor {
|
||||
// Walk each line of nvcapsMigMinorsPath and construct a mapping of nvidia
|
||||
// capabilities path to device minor for that capability
|
||||
migCaps := make(map[MigCap]MigMinor)
|
||||
scanner := bufio.NewScanner(minorsFile)
|
||||
for scanner.Scan() {
|
||||
cap, minor, err := processMigMinorsLine(scanner.Text())
|
||||
if err != nil {
|
||||
log.Printf("Skipping line in MIG minors file: %v", err)
|
||||
continue
|
||||
}
|
||||
migCaps[cap] = minor
|
||||
}
|
||||
return migCaps
|
||||
}
|
||||
|
||||
func processMigMinorsLine(line string) (MigCap, MigMinor, error) {
|
||||
parts := strings.Split(line, " ")
|
||||
if len(parts) != 2 {
|
||||
return "", 0, fmt.Errorf("error processing line: %v", line)
|
||||
}
|
||||
|
||||
migCap := MigCap(parts[0])
|
||||
if !migCap.isValid() {
|
||||
return "", 0, fmt.Errorf("invalid MIG minors line: '%v'", line)
|
||||
}
|
||||
|
||||
minor, err := strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("error reading MIG minor from '%v': %v", line, err)
|
||||
}
|
||||
|
||||
return migCap, MigMinor(minor), nil
|
||||
}
|
||||
|
||||
func (m MigCap) isValid() bool {
|
||||
cap := string(m)
|
||||
switch cap {
|
||||
case "config", "monitor":
|
||||
return true
|
||||
default:
|
||||
var gpu int
|
||||
var gi int
|
||||
var ci int
|
||||
// Loog for a CI access file
|
||||
n, _ := fmt.Sscanf(cap, "gpu%d/gi%d/ci%d/access", &gpu, &gi, &ci)
|
||||
if n == 3 {
|
||||
return true
|
||||
}
|
||||
// Look for a GI access file
|
||||
n, _ = fmt.Sscanf(cap, "gpu%d/gi%d/access %d", &gpu, &gi)
|
||||
if n == 2 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProcPath returns the proc path associated with the MIG capability
|
||||
func (m MigCap) ProcPath() string {
|
||||
id := string(m)
|
||||
|
||||
var path string
|
||||
switch id {
|
||||
case "config", "monitor":
|
||||
path = "mig/" + id
|
||||
default:
|
||||
parts := strings.SplitN(id, "/", 2)
|
||||
path = strings.Join([]string{parts[0], "mig", parts[1]}, "/")
|
||||
}
|
||||
return filepath.Join(nvidiaCapabilitiesPath, path)
|
||||
}
|
||||
|
||||
// DevicePath returns the path for the nvidia-caps device with the specified
|
||||
// minor number
|
||||
func (m MigMinor) DevicePath() string {
|
||||
return fmt.Sprintf(nvcapsDevicePath+"/nvidia-cap%d", m)
|
||||
}
|
||||
94
internal/nvcaps/nvcaps_test.go
Normal file
94
internal/nvcaps/nvcaps_test.go
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvcaps
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestProcessMinorsFile(t *testing.T) {
|
||||
testCases := []struct {
|
||||
lines []string
|
||||
expected map[MigCap]MigMinor
|
||||
}{
|
||||
{[]string{}, map[MigCap]MigMinor{}},
|
||||
{[]string{"invalidLine"}, map[MigCap]MigMinor{}},
|
||||
{[]string{"config 1"}, map[MigCap]MigMinor{"config": 1}},
|
||||
{[]string{"gpu0/gi0/ci0/access 4"}, map[MigCap]MigMinor{"gpu0/gi0/ci0/access": 4}},
|
||||
{[]string{"config 1", "invalidLine"}, map[MigCap]MigMinor{"config": 1}},
|
||||
{[]string{"config 1", "gpu0/gi0/ci0/access 4"}, map[MigCap]MigMinor{"config": 1, "gpu0/gi0/ci0/access": 4}},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
contents := strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
d := processMinorsFile(contents)
|
||||
require.Equalf(t, tc.expected, d, "testCase: %v", tc)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessMigMinorsLine(t *testing.T) {
|
||||
testCases := []struct {
|
||||
line string
|
||||
cap MigCap
|
||||
minor MigMinor
|
||||
err bool
|
||||
}{
|
||||
{"config 1", "config", 1, false},
|
||||
{"monitor 2", "monitor", 2, false},
|
||||
{"gpu0/gi0/access 3", "gpu0/gi0/access", 3, false},
|
||||
{"gpu0/gi0/ci0/access 4", "gpu0/gi0/ci0/access", 4, false},
|
||||
{"notconfig 99", "", 0, true},
|
||||
{"config notanint", "", 0, true},
|
||||
{"", "", 0, true},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
cap, minor, err := processMigMinorsLine(tc.line)
|
||||
|
||||
require.Equalf(t, tc.cap, cap, "testCase: %v", tc)
|
||||
require.Equalf(t, tc.minor, minor, "testCase: %v", tc)
|
||||
if tc.err {
|
||||
require.Errorf(t, err, "testCase: %v", tc)
|
||||
} else {
|
||||
require.NoErrorf(t, err, "testCase: %v", tc)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigCapProcPaths(t *testing.T) {
|
||||
testCases := []struct {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"config", "/proc/driver/nvidia/capabilities/mig/config"},
|
||||
{"monitor", "/proc/driver/nvidia/capabilities/mig/monitor"},
|
||||
{"gpu0/gi0/access", "/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"},
|
||||
{"gpu0/gi0/ci0/access", "/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
m := MigCap(tc.input)
|
||||
require.Equal(t, tc.expected, m.ProcPath())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigMinorDevicePath(t *testing.T) {
|
||||
m := MigMinor(0)
|
||||
require.Equal(t, "/dev/nvidia-caps/nvidia-cap0", m.DevicePath())
|
||||
}
|
||||
79
internal/nvml/consts.go
Normal file
79
internal/nvml/consts.go
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nvml
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
)
|
||||
|
||||
const (
|
||||
SUCCESS = nvml.SUCCESS
|
||||
ERROR_UNINITIALIZED = nvml.ERROR_UNINITIALIZED
|
||||
ERROR_INVALID_ARGUMENT = nvml.ERROR_INVALID_ARGUMENT
|
||||
ERROR_NOT_SUPPORTED = nvml.ERROR_NOT_SUPPORTED
|
||||
ERROR_NO_PERMISSION = nvml.ERROR_NO_PERMISSION
|
||||
ERROR_ALREADY_INITIALIZED = nvml.ERROR_ALREADY_INITIALIZED
|
||||
ERROR_NOT_FOUND = nvml.ERROR_NOT_FOUND
|
||||
ERROR_INSUFFICIENT_SIZE = nvml.ERROR_INSUFFICIENT_SIZE
|
||||
ERROR_INSUFFICIENT_POWER = nvml.ERROR_INSUFFICIENT_POWER
|
||||
ERROR_DRIVER_NOT_LOADED = nvml.ERROR_DRIVER_NOT_LOADED
|
||||
ERROR_TIMEOUT = nvml.ERROR_TIMEOUT
|
||||
ERROR_IRQ_ISSUE = nvml.ERROR_IRQ_ISSUE
|
||||
ERROR_LIBRARY_NOT_FOUND = nvml.ERROR_LIBRARY_NOT_FOUND
|
||||
ERROR_FUNCTION_NOT_FOUND = nvml.ERROR_FUNCTION_NOT_FOUND
|
||||
ERROR_CORRUPTED_INFOROM = nvml.ERROR_CORRUPTED_INFOROM
|
||||
ERROR_GPU_IS_LOST = nvml.ERROR_GPU_IS_LOST
|
||||
ERROR_RESET_REQUIRED = nvml.ERROR_RESET_REQUIRED
|
||||
ERROR_OPERATING_SYSTEM = nvml.ERROR_OPERATING_SYSTEM
|
||||
ERROR_LIB_RM_VERSION_MISMATCH = nvml.ERROR_LIB_RM_VERSION_MISMATCH
|
||||
ERROR_IN_USE = nvml.ERROR_IN_USE
|
||||
ERROR_MEMORY = nvml.ERROR_MEMORY
|
||||
ERROR_NO_DATA = nvml.ERROR_NO_DATA
|
||||
ERROR_VGPU_ECC_NOT_SUPPORTED = nvml.ERROR_VGPU_ECC_NOT_SUPPORTED
|
||||
ERROR_INSUFFICIENT_RESOURCES = nvml.ERROR_INSUFFICIENT_RESOURCES
|
||||
ERROR_UNKNOWN = nvml.ERROR_UNKNOWN
|
||||
)
|
||||
|
||||
const (
|
||||
DEVICE_MIG_ENABLE = nvml.DEVICE_MIG_ENABLE
|
||||
DEVICE_MIG_DISABLE = nvml.DEVICE_MIG_DISABLE
|
||||
)
|
||||
|
||||
const (
|
||||
GPU_INSTANCE_PROFILE_1_SLICE = nvml.GPU_INSTANCE_PROFILE_1_SLICE
|
||||
GPU_INSTANCE_PROFILE_2_SLICE = nvml.GPU_INSTANCE_PROFILE_2_SLICE
|
||||
GPU_INSTANCE_PROFILE_3_SLICE = nvml.GPU_INSTANCE_PROFILE_3_SLICE
|
||||
GPU_INSTANCE_PROFILE_4_SLICE = nvml.GPU_INSTANCE_PROFILE_4_SLICE
|
||||
GPU_INSTANCE_PROFILE_7_SLICE = nvml.GPU_INSTANCE_PROFILE_7_SLICE
|
||||
GPU_INSTANCE_PROFILE_8_SLICE = nvml.GPU_INSTANCE_PROFILE_8_SLICE
|
||||
GPU_INSTANCE_PROFILE_COUNT = nvml.GPU_INSTANCE_PROFILE_COUNT
|
||||
)
|
||||
|
||||
const (
|
||||
COMPUTE_INSTANCE_PROFILE_1_SLICE = nvml.COMPUTE_INSTANCE_PROFILE_1_SLICE
|
||||
COMPUTE_INSTANCE_PROFILE_2_SLICE = nvml.COMPUTE_INSTANCE_PROFILE_2_SLICE
|
||||
COMPUTE_INSTANCE_PROFILE_3_SLICE = nvml.COMPUTE_INSTANCE_PROFILE_3_SLICE
|
||||
COMPUTE_INSTANCE_PROFILE_4_SLICE = nvml.COMPUTE_INSTANCE_PROFILE_4_SLICE
|
||||
COMPUTE_INSTANCE_PROFILE_7_SLICE = nvml.COMPUTE_INSTANCE_PROFILE_7_SLICE
|
||||
COMPUTE_INSTANCE_PROFILE_8_SLICE = nvml.COMPUTE_INSTANCE_PROFILE_8_SLICE
|
||||
COMPUTE_INSTANCE_PROFILE_COUNT = nvml.COMPUTE_INSTANCE_PROFILE_COUNT
|
||||
)
|
||||
|
||||
const (
|
||||
COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED = nvml.COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED
|
||||
COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT = nvml.COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT
|
||||
)
|
||||
594
internal/nvml/mock.go
Normal file
594
internal/nvml/mock.go
Normal file
@@ -0,0 +1,594 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nvml
|
||||
|
||||
import "fmt"
|
||||
|
||||
type MockServer struct {
|
||||
Devices []Device
|
||||
}
|
||||
type MockLunaServer struct {
|
||||
MockServer
|
||||
}
|
||||
type MockA100Device struct {
|
||||
Index int
|
||||
MinorNumber int
|
||||
MigMode int
|
||||
GpuInstances map[*MockA100GpuInstance]struct{}
|
||||
GpuInstanceCounter uint32
|
||||
}
|
||||
|
||||
type MockA100GpuInstance struct {
|
||||
Info GpuInstanceInfo
|
||||
ComputeInstances map[*MockA100ComputeInstance]struct{}
|
||||
ComputeInstanceCounter uint32
|
||||
}
|
||||
type MockA100ComputeInstance struct {
|
||||
Info ComputeInstanceInfo
|
||||
}
|
||||
|
||||
var _ Interface = (*MockLunaServer)(nil)
|
||||
var _ Device = (*MockA100Device)(nil)
|
||||
var _ GpuInstance = (*MockA100GpuInstance)(nil)
|
||||
var _ ComputeInstance = (*MockA100ComputeInstance)(nil)
|
||||
|
||||
var MockA100MIGProfiles = struct {
|
||||
GpuInstanceProfiles map[int]GpuInstanceProfileInfo
|
||||
ComputeInstanceProfiles map[int]map[int]ComputeInstanceProfileInfo
|
||||
}{
|
||||
GpuInstanceProfiles: map[int]GpuInstanceProfileInfo{
|
||||
GPU_INSTANCE_PROFILE_1_SLICE: {
|
||||
Id: GPU_INSTANCE_PROFILE_1_SLICE,
|
||||
IsP2pSupported: 0,
|
||||
SliceCount: 1,
|
||||
InstanceCount: 7,
|
||||
MultiprocessorCount: 1,
|
||||
CopyEngineCount: 1,
|
||||
DecoderCount: 0,
|
||||
EncoderCount: 0,
|
||||
JpegCount: 0,
|
||||
OfaCount: 0,
|
||||
MemorySizeMB: 5120,
|
||||
},
|
||||
GPU_INSTANCE_PROFILE_2_SLICE: {
|
||||
Id: GPU_INSTANCE_PROFILE_2_SLICE,
|
||||
IsP2pSupported: 0,
|
||||
SliceCount: 2,
|
||||
InstanceCount: 3,
|
||||
MultiprocessorCount: 2,
|
||||
CopyEngineCount: 2,
|
||||
DecoderCount: 1,
|
||||
EncoderCount: 1,
|
||||
JpegCount: 0,
|
||||
OfaCount: 0,
|
||||
MemorySizeMB: 10240,
|
||||
},
|
||||
GPU_INSTANCE_PROFILE_3_SLICE: {
|
||||
Id: GPU_INSTANCE_PROFILE_3_SLICE,
|
||||
IsP2pSupported: 0,
|
||||
SliceCount: 3,
|
||||
InstanceCount: 2,
|
||||
MultiprocessorCount: 3,
|
||||
CopyEngineCount: 4,
|
||||
DecoderCount: 2,
|
||||
EncoderCount: 2,
|
||||
JpegCount: 0,
|
||||
OfaCount: 0,
|
||||
MemorySizeMB: 20480,
|
||||
},
|
||||
GPU_INSTANCE_PROFILE_4_SLICE: {
|
||||
Id: GPU_INSTANCE_PROFILE_4_SLICE,
|
||||
IsP2pSupported: 0,
|
||||
SliceCount: 4,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 4,
|
||||
CopyEngineCount: 4,
|
||||
DecoderCount: 2,
|
||||
EncoderCount: 2,
|
||||
JpegCount: 0,
|
||||
OfaCount: 0,
|
||||
MemorySizeMB: 20480,
|
||||
},
|
||||
GPU_INSTANCE_PROFILE_7_SLICE: {
|
||||
Id: GPU_INSTANCE_PROFILE_7_SLICE,
|
||||
IsP2pSupported: 0,
|
||||
SliceCount: 7,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 7,
|
||||
CopyEngineCount: 8,
|
||||
DecoderCount: 5,
|
||||
EncoderCount: 5,
|
||||
JpegCount: 1,
|
||||
OfaCount: 1,
|
||||
MemorySizeMB: 40960,
|
||||
},
|
||||
},
|
||||
ComputeInstanceProfiles: map[int]map[int]ComputeInstanceProfileInfo{
|
||||
GPU_INSTANCE_PROFILE_1_SLICE: {
|
||||
COMPUTE_INSTANCE_PROFILE_1_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_1_SLICE,
|
||||
SliceCount: 1,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 1,
|
||||
SharedCopyEngineCount: 1,
|
||||
SharedDecoderCount: 0,
|
||||
SharedEncoderCount: 0,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
},
|
||||
GPU_INSTANCE_PROFILE_2_SLICE: {
|
||||
COMPUTE_INSTANCE_PROFILE_1_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_1_SLICE,
|
||||
SliceCount: 1,
|
||||
InstanceCount: 2,
|
||||
MultiprocessorCount: 1,
|
||||
SharedCopyEngineCount: 2,
|
||||
SharedDecoderCount: 1,
|
||||
SharedEncoderCount: 1,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_2_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_2_SLICE,
|
||||
SliceCount: 2,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 2,
|
||||
SharedCopyEngineCount: 2,
|
||||
SharedDecoderCount: 1,
|
||||
SharedEncoderCount: 1,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
},
|
||||
GPU_INSTANCE_PROFILE_3_SLICE: {
|
||||
COMPUTE_INSTANCE_PROFILE_1_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_1_SLICE,
|
||||
SliceCount: 1,
|
||||
InstanceCount: 3,
|
||||
MultiprocessorCount: 1,
|
||||
SharedCopyEngineCount: 4,
|
||||
SharedDecoderCount: 2,
|
||||
SharedEncoderCount: 1,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_2_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_2_SLICE,
|
||||
SliceCount: 2,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 2,
|
||||
SharedCopyEngineCount: 4,
|
||||
SharedDecoderCount: 2,
|
||||
SharedEncoderCount: 2,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_3_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_3_SLICE,
|
||||
SliceCount: 3,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 3,
|
||||
SharedCopyEngineCount: 4,
|
||||
SharedDecoderCount: 2,
|
||||
SharedEncoderCount: 0,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
},
|
||||
GPU_INSTANCE_PROFILE_4_SLICE: {
|
||||
COMPUTE_INSTANCE_PROFILE_1_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_1_SLICE,
|
||||
SliceCount: 1,
|
||||
InstanceCount: 4,
|
||||
MultiprocessorCount: 1,
|
||||
SharedCopyEngineCount: 4,
|
||||
SharedDecoderCount: 2,
|
||||
SharedEncoderCount: 2,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_2_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_2_SLICE,
|
||||
SliceCount: 2,
|
||||
InstanceCount: 2,
|
||||
MultiprocessorCount: 2,
|
||||
SharedCopyEngineCount: 4,
|
||||
SharedDecoderCount: 2,
|
||||
SharedEncoderCount: 2,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_4_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_4_SLICE,
|
||||
SliceCount: 4,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 4,
|
||||
SharedCopyEngineCount: 4,
|
||||
SharedDecoderCount: 2,
|
||||
SharedEncoderCount: 2,
|
||||
SharedJpegCount: 0,
|
||||
SharedOfaCount: 0,
|
||||
},
|
||||
},
|
||||
GPU_INSTANCE_PROFILE_7_SLICE: {
|
||||
COMPUTE_INSTANCE_PROFILE_1_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_1_SLICE,
|
||||
SliceCount: 1,
|
||||
InstanceCount: 7,
|
||||
MultiprocessorCount: 1,
|
||||
SharedCopyEngineCount: 8,
|
||||
SharedDecoderCount: 5,
|
||||
SharedEncoderCount: 5,
|
||||
SharedJpegCount: 1,
|
||||
SharedOfaCount: 1,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_2_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_2_SLICE,
|
||||
SliceCount: 2,
|
||||
InstanceCount: 3,
|
||||
MultiprocessorCount: 2,
|
||||
SharedCopyEngineCount: 8,
|
||||
SharedDecoderCount: 5,
|
||||
SharedEncoderCount: 5,
|
||||
SharedJpegCount: 1,
|
||||
SharedOfaCount: 1,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_3_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_3_SLICE,
|
||||
SliceCount: 3,
|
||||
InstanceCount: 2,
|
||||
MultiprocessorCount: 3,
|
||||
SharedCopyEngineCount: 8,
|
||||
SharedDecoderCount: 5,
|
||||
SharedEncoderCount: 5,
|
||||
SharedJpegCount: 1,
|
||||
SharedOfaCount: 1,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_4_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_4_SLICE,
|
||||
SliceCount: 4,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 4,
|
||||
SharedCopyEngineCount: 8,
|
||||
SharedDecoderCount: 5,
|
||||
SharedEncoderCount: 5,
|
||||
SharedJpegCount: 1,
|
||||
SharedOfaCount: 1,
|
||||
},
|
||||
COMPUTE_INSTANCE_PROFILE_7_SLICE: {
|
||||
Id: COMPUTE_INSTANCE_PROFILE_7_SLICE,
|
||||
SliceCount: 7,
|
||||
InstanceCount: 1,
|
||||
MultiprocessorCount: 7,
|
||||
SharedCopyEngineCount: 8,
|
||||
SharedDecoderCount: 5,
|
||||
SharedEncoderCount: 5,
|
||||
SharedJpegCount: 1,
|
||||
SharedOfaCount: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func NewMockNVMLServer(devices ...Device) Interface {
|
||||
return &MockServer{
|
||||
Devices: devices,
|
||||
}
|
||||
}
|
||||
|
||||
func NewMockNVMLOnLunaServer() Interface {
|
||||
devices := []Device{
|
||||
NewMockA100Device(0),
|
||||
NewMockA100Device(1),
|
||||
NewMockA100Device(2),
|
||||
NewMockA100Device(3),
|
||||
NewMockA100Device(4),
|
||||
NewMockA100Device(5),
|
||||
NewMockA100Device(6),
|
||||
NewMockA100Device(7),
|
||||
}
|
||||
return NewMockNVMLServer(devices...)
|
||||
}
|
||||
|
||||
func NewMockA100Device(index int) Device {
|
||||
return &MockA100Device{
|
||||
Index: index,
|
||||
GpuInstances: make(map[*MockA100GpuInstance]struct{}),
|
||||
GpuInstanceCounter: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func NewMockA100GpuInstance(info GpuInstanceInfo) GpuInstance {
|
||||
return &MockA100GpuInstance{
|
||||
Info: info,
|
||||
ComputeInstances: make(map[*MockA100ComputeInstance]struct{}),
|
||||
ComputeInstanceCounter: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func NewMockA100ComputeInstance(info ComputeInstanceInfo) ComputeInstance {
|
||||
return &MockA100ComputeInstance{
|
||||
Info: info,
|
||||
}
|
||||
}
|
||||
|
||||
func (n *MockServer) Init() Return {
|
||||
return MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (n *MockServer) Shutdown() Return {
|
||||
return MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (n *MockServer) DeviceGetCount() (int, Return) {
|
||||
return len(n.Devices), MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (n *MockServer) DeviceGetHandleByIndex(index int) (Device, Return) {
|
||||
if index < 0 || index >= len(n.Devices) {
|
||||
return nil, MockReturn(ERROR_INVALID_ARGUMENT)
|
||||
}
|
||||
return n.Devices[index], MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (n *MockServer) SystemGetDriverVersion() (string, Return) {
|
||||
return "999.99", MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetIndex() (int, Return) {
|
||||
return d.Index, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetPciInfo() (PciInfo, Return) {
|
||||
var busID [32]int8
|
||||
for i, b := range []byte("0000FFFF:FF:FF.F") {
|
||||
busID[i] = int8(b)
|
||||
}
|
||||
p := PciInfo{
|
||||
BusId: busID,
|
||||
PciDeviceId: 0x20B010DE,
|
||||
}
|
||||
return p, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetUUID() (string, Return) {
|
||||
return fmt.Sprintf("GPU-%d", d.Index), MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetMinorNumber() (int, Return) {
|
||||
return d.MinorNumber, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) SetMigMode(mode int) (Return, Return) {
|
||||
d.MigMode = mode
|
||||
return MockReturn(SUCCESS), MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetMigMode() (int, int, Return) {
|
||||
return d.MigMode, d.MigMode, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetGpuInstanceProfileInfo(giProfileId int) (GpuInstanceProfileInfo, Return) {
|
||||
if giProfileId < 0 || giProfileId >= GPU_INSTANCE_PROFILE_COUNT {
|
||||
return GpuInstanceProfileInfo{}, MockReturn(ERROR_INVALID_ARGUMENT)
|
||||
}
|
||||
|
||||
if _, exists := MockA100MIGProfiles.GpuInstanceProfiles[giProfileId]; !exists {
|
||||
return GpuInstanceProfileInfo{}, MockReturn(ERROR_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
return MockA100MIGProfiles.GpuInstanceProfiles[giProfileId], MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) CreateGpuInstance(info *GpuInstanceProfileInfo) (GpuInstance, Return) {
|
||||
giInfo := GpuInstanceInfo{
|
||||
Device: d,
|
||||
Id: d.GpuInstanceCounter,
|
||||
ProfileId: info.Id,
|
||||
}
|
||||
d.GpuInstanceCounter++
|
||||
gi := NewMockA100GpuInstance(giInfo)
|
||||
d.GpuInstances[gi.(*MockA100GpuInstance)] = struct{}{}
|
||||
return gi, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetGpuInstances(info *GpuInstanceProfileInfo) ([]GpuInstance, Return) {
|
||||
var gis []GpuInstance
|
||||
for gi := range d.GpuInstances {
|
||||
if gi.Info.ProfileId == info.Id {
|
||||
gis = append(gis, gi)
|
||||
}
|
||||
}
|
||||
return gis, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetMaxMigDeviceCount() (int, Return) {
|
||||
var count int
|
||||
for gi := range d.GpuInstances {
|
||||
count = count + int(gi.ComputeInstanceCounter)
|
||||
}
|
||||
return count, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetMigDeviceHandleByIndex(Index int) (Device, Return) {
|
||||
var count int
|
||||
for gi := range d.GpuInstances {
|
||||
if count+int(gi.ComputeInstanceCounter) < Index {
|
||||
count = count + int(gi.ComputeInstanceCounter)
|
||||
continue
|
||||
}
|
||||
for ci := range gi.ComputeInstances {
|
||||
if count < Index {
|
||||
count++
|
||||
continue
|
||||
}
|
||||
|
||||
return ci, MockReturn(SUCCESS)
|
||||
}
|
||||
}
|
||||
return nil, MockReturn(ERROR_NOT_FOUND)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetDeviceHandleFromMigDeviceHandle() (Device, Return) {
|
||||
return nil, MockReturn(ERROR_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) IsMigDeviceHandle() (bool, Return) {
|
||||
return false, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetComputeInstanceId() (int, Return) {
|
||||
panic("Not implemented: GetComputeInstanceId")
|
||||
}
|
||||
|
||||
func (d *MockA100Device) GetGPUInstanceId() (int, Return) {
|
||||
panic("Not implemented: GetGPUInstanceId")
|
||||
}
|
||||
|
||||
func (gi *MockA100GpuInstance) GetInfo() (GpuInstanceInfo, Return) {
|
||||
return gi.Info, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (gi *MockA100GpuInstance) GetComputeInstanceProfileInfo(ciProfileId int, ciEngProfileId int) (ComputeInstanceProfileInfo, Return) {
|
||||
if ciProfileId < 0 || ciProfileId >= COMPUTE_INSTANCE_PROFILE_COUNT {
|
||||
return ComputeInstanceProfileInfo{}, MockReturn(ERROR_INVALID_ARGUMENT)
|
||||
}
|
||||
|
||||
if ciEngProfileId != COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED {
|
||||
return ComputeInstanceProfileInfo{}, MockReturn(ERROR_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
giProfileId := int(gi.Info.ProfileId)
|
||||
|
||||
if _, exists := MockA100MIGProfiles.ComputeInstanceProfiles[giProfileId]; !exists {
|
||||
return ComputeInstanceProfileInfo{}, MockReturn(ERROR_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
if _, exists := MockA100MIGProfiles.ComputeInstanceProfiles[giProfileId][ciProfileId]; !exists {
|
||||
return ComputeInstanceProfileInfo{}, MockReturn(ERROR_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
return MockA100MIGProfiles.ComputeInstanceProfiles[giProfileId][ciProfileId], MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (gi *MockA100GpuInstance) CreateComputeInstance(info *ComputeInstanceProfileInfo) (ComputeInstance, Return) {
|
||||
ciInfo := ComputeInstanceInfo{
|
||||
Device: gi.Info.Device,
|
||||
GpuInstance: gi,
|
||||
Id: gi.ComputeInstanceCounter,
|
||||
ProfileId: info.Id,
|
||||
}
|
||||
gi.ComputeInstanceCounter++
|
||||
ci := NewMockA100ComputeInstance(ciInfo)
|
||||
gi.ComputeInstances[ci.(*MockA100ComputeInstance)] = struct{}{}
|
||||
return ci, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (gi *MockA100GpuInstance) GetComputeInstances(info *ComputeInstanceProfileInfo) ([]ComputeInstance, Return) {
|
||||
var cis []ComputeInstance
|
||||
for ci := range gi.ComputeInstances {
|
||||
if ci.Info.ProfileId == info.Id {
|
||||
cis = append(cis, ci)
|
||||
}
|
||||
}
|
||||
return cis, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (gi *MockA100GpuInstance) Destroy() Return {
|
||||
delete(gi.Info.Device.(*MockA100Device).GpuInstances, gi)
|
||||
return MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (ci *MockA100ComputeInstance) GetInfo() (ComputeInstanceInfo, Return) {
|
||||
return ci.Info, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (ci *MockA100ComputeInstance) Destroy() Return {
|
||||
delete(ci.Info.GpuInstance.(*MockA100GpuInstance).ComputeInstances, ci)
|
||||
return MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
// Since a compute instance can be used as a MIG device handle, it must also
|
||||
// implement the Device interface
|
||||
var _ Device = (*MockA100ComputeInstance)(nil)
|
||||
|
||||
func (c *MockA100ComputeInstance) GetIndex() (int, Return) {
|
||||
return int(c.Info.Id), MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetPciInfo() (PciInfo, Return) {
|
||||
// TODO: How does this behave on an actual MIG system?
|
||||
panic("Not implemented: GetPciInfo")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetUUID() (string, Return) {
|
||||
return fmt.Sprintf("MIG-%d", c.Info.Id), MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetMinorNumber() (int, Return) {
|
||||
// TODO: This depends on the content of the mig-minors file and the (gpu, gi, ci) tuple
|
||||
panic("Not implemented: GetMinorNumber")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) SetMigMode(Mode int) (Return, Return) {
|
||||
panic("Not implemented: SetMigMode")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetMigMode() (int, int, Return) {
|
||||
panic("Not implemented: GetMigMode")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetGpuInstanceProfileInfo(Profile int) (GpuInstanceProfileInfo, Return) {
|
||||
panic("Not implemented: GetGpuInstanceProfileInfo")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) CreateGpuInstance(Info *GpuInstanceProfileInfo) (GpuInstance, Return) {
|
||||
panic("Not implemented: CreateGpuInstance")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetGpuInstances(Info *GpuInstanceProfileInfo) ([]GpuInstance, Return) {
|
||||
panic("Not implemented: GetGpuInstances")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetMaxMigDeviceCount() (int, Return) {
|
||||
panic("Not implemented: GetMaxMigDeviceCount")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetMigDeviceHandleByIndex(Index int) (Device, Return) {
|
||||
panic("Not implemented: GetMigDeviceHandleByIndex")
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetDeviceHandleFromMigDeviceHandle() (Device, Return) {
|
||||
return c.Info.Device, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) IsMigDeviceHandle() (bool, Return) {
|
||||
return true, MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetComputeInstanceId() (int, Return) {
|
||||
return int(c.Info.Id), MockReturn(SUCCESS)
|
||||
}
|
||||
|
||||
func (c *MockA100ComputeInstance) GetGPUInstanceId() (int, Return) {
|
||||
info, r := c.Info.GpuInstance.GetInfo()
|
||||
if r.Value() != SUCCESS {
|
||||
return 0, MockReturn(r.Value())
|
||||
}
|
||||
return int(info.Id), MockReturn(SUCCESS)
|
||||
}
|
||||
188
internal/nvml/nvml.go
Normal file
188
internal/nvml/nvml.go
Normal file
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nvml
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
)
|
||||
|
||||
type nvmlLib struct{}
|
||||
type nvmlDevice nvml.Device
|
||||
type nvmlGpuInstance nvml.GpuInstance
|
||||
type nvmlComputeInstance nvml.ComputeInstance
|
||||
|
||||
var _ Interface = (*nvmlLib)(nil)
|
||||
var _ Device = (*nvmlDevice)(nil)
|
||||
var _ GpuInstance = (*nvmlGpuInstance)(nil)
|
||||
var _ ComputeInstance = (*nvmlComputeInstance)(nil)
|
||||
|
||||
func New() Interface {
|
||||
return &nvmlLib{}
|
||||
}
|
||||
|
||||
func (n *nvmlLib) Init() Return {
|
||||
return nvmlReturn(nvml.Init())
|
||||
}
|
||||
|
||||
func (n *nvmlLib) Shutdown() Return {
|
||||
return nvmlReturn(nvml.Shutdown())
|
||||
}
|
||||
|
||||
func (n *nvmlLib) DeviceGetCount() (int, Return) {
|
||||
c, r := nvml.DeviceGetCount()
|
||||
return c, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (n *nvmlLib) DeviceGetHandleByIndex(index int) (Device, Return) {
|
||||
d, r := nvml.DeviceGetHandleByIndex(index)
|
||||
return nvmlDevice(d), nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (n *nvmlLib) SystemGetDriverVersion() (string, Return) {
|
||||
v, r := nvml.SystemGetDriverVersion()
|
||||
return v, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetIndex() (int, Return) {
|
||||
i, r := nvml.Device(d).GetIndex()
|
||||
return i, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetPciInfo() (PciInfo, Return) {
|
||||
p, r := nvml.Device(d).GetPciInfo()
|
||||
return PciInfo(p), nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetUUID() (string, Return) {
|
||||
u, r := nvml.Device(d).GetUUID()
|
||||
return u, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetMinorNumber() (int, Return) {
|
||||
m, r := nvml.Device(d).GetMinorNumber()
|
||||
return m, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) IsMigDeviceHandle() (bool, Return) {
|
||||
b, r := nvml.Device(d).IsMigDeviceHandle()
|
||||
return b, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetDeviceHandleFromMigDeviceHandle() (Device, Return) {
|
||||
p, r := nvml.Device(d).GetDeviceHandleFromMigDeviceHandle()
|
||||
return nvmlDevice(p), nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetGPUInstanceId() (int, Return) {
|
||||
gi, r := nvml.Device(d).GetGpuInstanceId()
|
||||
return gi, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetComputeInstanceId() (int, Return) {
|
||||
ci, r := nvml.Device(d).GetComputeInstanceId()
|
||||
return ci, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) SetMigMode(mode int) (Return, Return) {
|
||||
r1, r2 := nvml.Device(d).SetMigMode(mode)
|
||||
return nvmlReturn(r1), nvmlReturn(r2)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetMigMode() (int, int, Return) {
|
||||
s1, s2, r := nvml.Device(d).GetMigMode()
|
||||
return s1, s2, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetGpuInstanceProfileInfo(profile int) (GpuInstanceProfileInfo, Return) {
|
||||
p, r := nvml.Device(d).GetGpuInstanceProfileInfo(profile)
|
||||
return GpuInstanceProfileInfo(p), nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) CreateGpuInstance(info *GpuInstanceProfileInfo) (GpuInstance, Return) {
|
||||
gi, r := nvml.Device(d).CreateGpuInstance((*nvml.GpuInstanceProfileInfo)(info))
|
||||
return nvmlGpuInstance(gi), nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetGpuInstances(info *GpuInstanceProfileInfo) ([]GpuInstance, Return) {
|
||||
nvmlGis, r := nvml.Device(d).GetGpuInstances((*nvml.GpuInstanceProfileInfo)(info))
|
||||
var gis []GpuInstance
|
||||
for _, gi := range nvmlGis {
|
||||
gis = append(gis, nvmlGpuInstance(gi))
|
||||
}
|
||||
return gis, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetMaxMigDeviceCount() (int, Return) {
|
||||
m, r := nvml.Device(d).GetMaxMigDeviceCount()
|
||||
return m, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (d nvmlDevice) GetMigDeviceHandleByIndex(Index int) (Device, Return) {
|
||||
h, r := nvml.Device(d).GetMigDeviceHandleByIndex(Index)
|
||||
return nvmlDevice(h), nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (gi nvmlGpuInstance) GetInfo() (GpuInstanceInfo, Return) {
|
||||
i, r := nvml.GpuInstance(gi).GetInfo()
|
||||
info := GpuInstanceInfo{
|
||||
Device: nvmlDevice(i.Device),
|
||||
Id: i.Id,
|
||||
ProfileId: i.ProfileId,
|
||||
Placement: i.Placement,
|
||||
}
|
||||
return info, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (gi nvmlGpuInstance) GetComputeInstanceProfileInfo(profile int, engProfile int) (ComputeInstanceProfileInfo, Return) {
|
||||
p, r := nvml.GpuInstance(gi).GetComputeInstanceProfileInfo(profile, engProfile)
|
||||
return ComputeInstanceProfileInfo(p), nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (gi nvmlGpuInstance) CreateComputeInstance(info *ComputeInstanceProfileInfo) (ComputeInstance, Return) {
|
||||
ci, r := nvml.GpuInstance(gi).CreateComputeInstance((*nvml.ComputeInstanceProfileInfo)(info))
|
||||
return nvmlComputeInstance(ci), nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (gi nvmlGpuInstance) GetComputeInstances(info *ComputeInstanceProfileInfo) ([]ComputeInstance, Return) {
|
||||
nvmlCis, r := nvml.GpuInstance(gi).GetComputeInstances((*nvml.ComputeInstanceProfileInfo)(info))
|
||||
var cis []ComputeInstance
|
||||
for _, ci := range nvmlCis {
|
||||
cis = append(cis, nvmlComputeInstance(ci))
|
||||
}
|
||||
return cis, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (gi nvmlGpuInstance) Destroy() Return {
|
||||
r := nvml.GpuInstance(gi).Destroy()
|
||||
return nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (ci nvmlComputeInstance) GetInfo() (ComputeInstanceInfo, Return) {
|
||||
i, r := nvml.ComputeInstance(ci).GetInfo()
|
||||
info := ComputeInstanceInfo{
|
||||
Device: nvmlDevice(i.Device),
|
||||
GpuInstance: nvmlGpuInstance(i.GpuInstance),
|
||||
Id: i.Id,
|
||||
ProfileId: i.ProfileId,
|
||||
}
|
||||
return info, nvmlReturn(r)
|
||||
}
|
||||
|
||||
func (ci nvmlComputeInstance) Destroy() Return {
|
||||
r := nvml.ComputeInstance(ci).Destroy()
|
||||
return nvmlReturn(r)
|
||||
}
|
||||
110
internal/nvml/return.go
Normal file
110
internal/nvml/return.go
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package nvml
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
)
|
||||
|
||||
type Return interface {
|
||||
Value() nvml.Return
|
||||
String() string
|
||||
Error() string
|
||||
}
|
||||
|
||||
type nvmlReturn nvml.Return
|
||||
type MockReturn nvml.Return
|
||||
|
||||
var _ Return = (*nvmlReturn)(nil)
|
||||
var _ Return = (*MockReturn)(nil)
|
||||
|
||||
func (r nvmlReturn) Value() nvml.Return {
|
||||
return nvml.Return(r)
|
||||
}
|
||||
|
||||
func (r nvmlReturn) String() string {
|
||||
return r.Error()
|
||||
}
|
||||
|
||||
func (r nvmlReturn) Error() string {
|
||||
return nvml.ErrorString(nvml.Return(r))
|
||||
}
|
||||
|
||||
func (r MockReturn) Value() nvml.Return {
|
||||
return nvml.Return(r)
|
||||
}
|
||||
|
||||
func (r MockReturn) String() string {
|
||||
return r.Error()
|
||||
}
|
||||
|
||||
func (r MockReturn) Error() string {
|
||||
switch nvml.Return(r) {
|
||||
case SUCCESS:
|
||||
return "SUCCESS"
|
||||
case ERROR_UNINITIALIZED:
|
||||
return "ERROR_UNINITIALIZED"
|
||||
case ERROR_INVALID_ARGUMENT:
|
||||
return "ERROR_INVALID_ARGUMENT"
|
||||
case ERROR_NOT_SUPPORTED:
|
||||
return "ERROR_NOT_SUPPORTED"
|
||||
case ERROR_NO_PERMISSION:
|
||||
return "ERROR_NO_PERMISSION"
|
||||
case ERROR_ALREADY_INITIALIZED:
|
||||
return "ERROR_ALREADY_INITIALIZED"
|
||||
case ERROR_NOT_FOUND:
|
||||
return "ERROR_NOT_FOUND"
|
||||
case ERROR_INSUFFICIENT_SIZE:
|
||||
return "ERROR_INSUFFICIENT_SIZE"
|
||||
case ERROR_INSUFFICIENT_POWER:
|
||||
return "ERROR_INSUFFICIENT_POWER"
|
||||
case ERROR_DRIVER_NOT_LOADED:
|
||||
return "ERROR_DRIVER_NOT_LOADED"
|
||||
case ERROR_TIMEOUT:
|
||||
return "ERROR_TIMEOUT"
|
||||
case ERROR_IRQ_ISSUE:
|
||||
return "ERROR_IRQ_ISSUE"
|
||||
case ERROR_LIBRARY_NOT_FOUND:
|
||||
return "ERROR_LIBRARY_NOT_FOUND"
|
||||
case ERROR_FUNCTION_NOT_FOUND:
|
||||
return "ERROR_FUNCTION_NOT_FOUND"
|
||||
case ERROR_CORRUPTED_INFOROM:
|
||||
return "ERROR_CORRUPTED_INFOROM"
|
||||
case ERROR_GPU_IS_LOST:
|
||||
return "ERROR_GPU_IS_LOST"
|
||||
case ERROR_RESET_REQUIRED:
|
||||
return "ERROR_RESET_REQUIRED"
|
||||
case ERROR_OPERATING_SYSTEM:
|
||||
return "ERROR_OPERATING_SYSTEM"
|
||||
case ERROR_LIB_RM_VERSION_MISMATCH:
|
||||
return "ERROR_LIB_RM_VERSION_MISMATCH"
|
||||
case ERROR_IN_USE:
|
||||
return "ERROR_IN_USE"
|
||||
case ERROR_MEMORY:
|
||||
return "ERROR_MEMORY"
|
||||
case ERROR_NO_DATA:
|
||||
return "ERROR_NO_DATA"
|
||||
case ERROR_VGPU_ECC_NOT_SUPPORTED:
|
||||
return "ERROR_VGPU_ECC_NOT_SUPPORTED"
|
||||
case ERROR_INSUFFICIENT_RESOURCES:
|
||||
return "ERROR_INSUFFICIENT_RESOURCES"
|
||||
case ERROR_UNKNOWN:
|
||||
return "ERROR_UNKNOWN"
|
||||
default:
|
||||
return "Unknown return value"
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user