Compare commits

..

67 Commits

Author SHA1 Message Date
Evan Lezar
cdfb232d4c Merge pull request #445 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/sys-0.19.0
Bump golang.org/x/sys from 0.18.0 to 0.19.0
2024-04-16 11:38:26 +02:00
dependabot[bot]
fa259354ac Bump golang.org/x/sys from 0.18.0 to 0.19.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.18.0 to 0.19.0.
- [Commits](https://github.com/golang/sys/compare/v0.18.0...v0.19.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-15 09:51:28 +00:00
Evan Lezar
43a3861463 Merge pull request #442 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/mod-0.17.0
Bump golang.org/x/mod from 0.16.0 to 0.17.0
2024-04-15 11:50:43 +02:00
dependabot[bot]
9274829517 Bump golang.org/x/mod from 0.16.0 to 0.17.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.16.0 to 0.17.0.
- [Commits](https://github.com/golang/mod/compare/v0.16.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-07 08:14:08 +00:00
Evan Lezar
4668c511de Merge pull request #415 from NVIDIA/dependabot/go_modules/release-1.14/github.com/NVIDIA/go-nvlib-0.2.0
Bump github.com/NVIDIA/go-nvlib from 0.0.0-20231115170030-b21432a353e1 to 0.2.0
2024-03-18 14:20:18 +02:00
dependabot[bot]
76ecce5e8f Bump github.com/NVIDIA/go-nvlib
Bumps [github.com/NVIDIA/go-nvlib](https://github.com/NVIDIA/go-nvlib) from 0.0.0-20231115170030-b21432a353e1 to 0.2.0.
- [Release notes](https://github.com/NVIDIA/go-nvlib/releases)
- [Commits](https://github.com/NVIDIA/go-nvlib/commits/v0.2.0)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvlib
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-17 08:06:07 +00:00
Evan Lezar
cbb66c1a30 Merge pull request #399 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/sys-0.18.0
Bump golang.org/x/sys from 0.17.0 to 0.18.0
2024-03-12 14:11:14 +02:00
dependabot[bot]
a714bf2d83 Bump golang.org/x/sys from 0.17.0 to 0.18.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.17.0 to 0.18.0.
- [Commits](https://github.com/golang/sys/compare/v0.17.0...v0.18.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-12 11:15:38 +00:00
Evan Lezar
de2ed16f6c Merge pull request #398 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/mod-0.16.0
Bump golang.org/x/mod from 0.15.0 to 0.16.0
2024-03-12 13:14:29 +02:00
dependabot[bot]
99c8370f25 Bump golang.org/x/mod from 0.15.0 to 0.16.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.15.0 to 0.16.0.
- [Commits](https://github.com/golang/mod/compare/v0.15.0...v0.16.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-10 08:16:57 +00:00
Evan Lezar
d20b00e360 Merge pull request #390 from NVIDIA/dependabot/go_modules/release-1.14/github.com/stretchr/testify-1.9.0
Bump github.com/stretchr/testify from 1.8.4 to 1.9.0
2024-03-01 22:16:46 +02:00
dependabot[bot]
a724207fbd Bump github.com/stretchr/testify from 1.8.4 to 1.9.0
Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.8.4 to 1.9.0.
- [Release notes](https://github.com/stretchr/testify/releases)
- [Commits](https://github.com/stretchr/testify/compare/v1.8.4...v1.9.0)

---
updated-dependencies:
- dependency-name: github.com/stretchr/testify
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-01 15:45:38 +00:00
Evan Lezar
37b1e37c8f Merge pull request #385 from elezar/migrate-github-go-nvlib
Use github.com/NVIDIA/go-nvlib
2024-02-29 10:33:38 +02:00
Evan Lezar
9019dd1d02 Use github.com/NVIDIA/go-nvlib
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-29 10:29:20 +02:00
Christopher Desiniotis
5605d19133 Merge pull request #380 from tariq1890/cpick-exists-fallback
[R550 driver support] add fallback logic to device.Exists(name)
2024-02-27 12:41:04 -08:00
Tariq Ibrahim
3a0c989066 [R550 driver support] add fallback logic to device.Exists(name)
Signed-off-by: Tariq Ibrahim <tibrahim@nvidia.com>
(cherry picked from commit f80f4c485d)
2024-02-27 12:30:45 -08:00
Evan Lezar
5d246adf3d Merge pull request #376 from elezar/cherry-pick-v1.14.6
Cherry pick v1.14.6
2024-02-27 16:55:22 +02:00
Evan Lezar
8281e7d341 Update libnvidia-container to d2eb0afe
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-27 14:35:41 +02:00
Evan Lezar
1d046b4a9b Bump version to v1.14.6
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-27 13:34:23 +02:00
Evan Lezar
888ad62c98 Update changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-27 13:33:53 +02:00
Kevin Klues
2fa37973e0 Add support for an NVIDIA_IMEX_CHANNELS envvar
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2024-02-27 13:33:33 +02:00
Evan Lezar
d57d83405d Merge pull request #352 from elezar/cherry-pick-release-1.14
Support both nvidia and nvidia-frontend when extracting major
2024-02-19 13:59:34 +01:00
Evan Lezar
6b077a2f1c Bump libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-19 13:36:51 +01:00
Evan Lezar
4e2861fe77 Support both nvidia and nvidia-frontend when extracting major
With newer driver versions the nvidia-frontend name in /proc/devices
was replaced with nvidia. This change allows both nvidia and nvidia-frontend
to be used to extract the expected device major (195).

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-19 13:36:01 +01:00
Evan Lezar
b51d51369d Merge pull request #368 from NVIDIA/dependabot/go_modules/release-1.14/github.com/opencontainers/runtime-spec-1.2.0
Bump github.com/opencontainers/runtime-spec from 1.1.0 to 1.2.0
2024-02-19 11:36:08 +01:00
dependabot[bot]
cc15f77f9a Bump github.com/opencontainers/runtime-spec from 1.1.0 to 1.2.0
Bumps [github.com/opencontainers/runtime-spec](https://github.com/opencontainers/runtime-spec) from 1.1.0 to 1.2.0.
- [Release notes](https://github.com/opencontainers/runtime-spec/releases)
- [Changelog](https://github.com/opencontainers/runtime-spec/blob/main/ChangeLog)
- [Commits](https://github.com/opencontainers/runtime-spec/compare/v1.1.0...v1.2.0)

---
updated-dependencies:
- dependency-name: github.com/opencontainers/runtime-spec
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-19 10:20:33 +00:00
Evan Lezar
10e86367f9 Merge pull request #365 from NVIDIA/dependabot/go_modules/release-1.14/github.com/NVIDIA/go-nvml-0.12.0-2
Bump github.com/NVIDIA/go-nvml from 0.12.0-1 to 0.12.0-2
2024-02-19 11:19:24 +01:00
dependabot[bot]
7c6c5e6104 Bump github.com/NVIDIA/go-nvml from 0.12.0-1 to 0.12.0-2
Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.12.0-1 to 0.12.0-2.
- [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.12.0-1...v0.12.0-2)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-19 10:17:03 +00:00
Evan Lezar
323580c2fa Merge pull request #366 from NVIDIA/dependabot/go_modules/release-1.14/github.com/stretchr/testify-1.8.4
Bump github.com/stretchr/testify from 1.8.1 to 1.8.4
2024-02-19 11:15:21 +01:00
dependabot[bot]
ae329e3a94 Bump github.com/stretchr/testify from 1.8.1 to 1.8.4
Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.8.1 to 1.8.4.
- [Release notes](https://github.com/stretchr/testify/releases)
- [Commits](https://github.com/stretchr/testify/compare/v1.8.1...v1.8.4)

---
updated-dependencies:
- dependency-name: github.com/stretchr/testify
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-19 10:11:40 +00:00
Evan Lezar
99c955a3f4 Merge pull request #367 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/mod-0.15.0
Bump golang.org/x/mod from 0.5.0 to 0.15.0
2024-02-19 11:10:59 +01:00
dependabot[bot]
24a48582ca Bump golang.org/x/mod from 0.5.0 to 0.15.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.5.0 to 0.15.0.
- [Commits](https://github.com/golang/mod/compare/v0.5.0...v0.15.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-18 08:17:27 +00:00
Evan Lezar
a3e0a72fd0 Merge pull request #356 from NVIDIA/dependabot/go_modules/release-1.14/github.com/urfave/cli/v2-2.27.1
Bump github.com/urfave/cli/v2 from 2.3.0 to 2.27.1
2024-02-13 21:10:07 +01:00
Evan Lezar
2bc874376f Merge pull request #358 from NVIDIA/dependabot/go_modules/release-1.14/github.com/fsnotify/fsnotify-1.7.0
Bump github.com/fsnotify/fsnotify from 1.5.4 to 1.7.0
2024-02-13 21:09:53 +01:00
dependabot[bot]
bd1084d1a1 Bump github.com/urfave/cli/v2 from 2.3.0 to 2.27.1
Bumps [github.com/urfave/cli/v2](https://github.com/urfave/cli) from 2.3.0 to 2.27.1.
- [Release notes](https://github.com/urfave/cli/releases)
- [Changelog](https://github.com/urfave/cli/blob/main/docs/CHANGELOG.md)
- [Commits](https://github.com/urfave/cli/compare/v2.3.0...v2.27.1)

---
updated-dependencies:
- dependency-name: github.com/urfave/cli/v2
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:51:28 +00:00
dependabot[bot]
98e5ad0a10 Bump github.com/fsnotify/fsnotify from 1.5.4 to 1.7.0
Bumps [github.com/fsnotify/fsnotify](https://github.com/fsnotify/fsnotify) from 1.5.4 to 1.7.0.
- [Release notes](https://github.com/fsnotify/fsnotify/releases)
- [Changelog](https://github.com/fsnotify/fsnotify/blob/main/CHANGELOG.md)
- [Commits](https://github.com/fsnotify/fsnotify/compare/v1.5.4...v1.7.0)

---
updated-dependencies:
- dependency-name: github.com/fsnotify/fsnotify
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:50:54 +00:00
Evan Lezar
3c710a0596 Merge pull request #359 from NVIDIA/dependabot/go_modules/release-1.14/github.com/pelletier/go-toml-1.9.5
Bump github.com/pelletier/go-toml from 1.9.4 to 1.9.5
2024-02-12 17:46:44 +01:00
dependabot[bot]
38a8bb183a Bump github.com/pelletier/go-toml from 1.9.4 to 1.9.5
Bumps [github.com/pelletier/go-toml](https://github.com/pelletier/go-toml) from 1.9.4 to 1.9.5.
- [Release notes](https://github.com/pelletier/go-toml/releases)
- [Changelog](https://github.com/pelletier/go-toml/blob/v2/.goreleaser.yaml)
- [Commits](https://github.com/pelletier/go-toml/compare/v1.9.4...v1.9.5)

---
updated-dependencies:
- dependency-name: github.com/pelletier/go-toml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:41:39 +00:00
Evan Lezar
7038e7f003 Merge pull request #357 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/sys-0.17.0
Bump golang.org/x/sys from 0.7.0 to 0.17.0
2024-02-12 17:41:20 +01:00
Evan Lezar
539033af43 Merge pull request #355 from NVIDIA/dependabot/go_modules/release-1.14/github.com/sirupsen/logrus-1.9.3
Bump github.com/sirupsen/logrus from 1.9.0 to 1.9.3
2024-02-12 17:40:56 +01:00
dependabot[bot]
365e9e03b9 Bump golang.org/x/sys from 0.7.0 to 0.17.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.7.0 to 0.17.0.
- [Commits](https://github.com/golang/sys/compare/v0.7.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:35:26 +00:00
dependabot[bot]
c1894a0760 Bump github.com/sirupsen/logrus from 1.9.0 to 1.9.3
Bumps [github.com/sirupsen/logrus](https://github.com/sirupsen/logrus) from 1.9.0 to 1.9.3.
- [Release notes](https://github.com/sirupsen/logrus/releases)
- [Changelog](https://github.com/sirupsen/logrus/blob/master/CHANGELOG.md)
- [Commits](https://github.com/sirupsen/logrus/compare/v1.9.0...v1.9.3)

---
updated-dependencies:
- dependency-name: github.com/sirupsen/logrus
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:35:05 +00:00
Evan Lezar
9ea3360701 Merge pull request #340 from elezar/cherry-pick-v1.14.5
Update dependencies for v1.14.5 release
2024-02-07 12:35:09 +01:00
Evan Lezar
b6987c526a Fix docstring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 12:33:02 +01:00
Evan Lezar
3604927034 Replace github.com/container-orchestrated-devices/container-device-interface with tags.cncf.io
This also removes the indirect dependency on runc which has a CVE.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 12:29:01 +01:00
Evan Lezar
640bd6ee3f Remove blossom-ci
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 12:29:01 +01:00
Evan Lezar
6593f3c2e4 Merge pull request #320 from elezar/remove-centos8-jobs
Remove centos8 jobs

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 12:28:54 +01:00
Evan Lezar
d52a237c12 Merge pull request #339 from elezar/cherry-pick-1.14.5
Cherry-pick changes for v1.14.5
2024-02-07 10:20:27 +01:00
Evan Lezar
9d9260db8c Merge branch 'fix-cdi-enable-docker' into 'main'
Fix --cdi.enabled for Docker

See merge request nvidia/container-toolkit/container-toolkit!541

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 10:14:23 +01:00
Evan Lezar
888fe458ae Bump version to v1.14.5
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 10:09:48 +01:00
Evan Lezar
d167812ce3 Merge branch 'cherry-pick-enable-cdi' into 'release-1.14'
Add cdi.enabled option to runtime configure

See merge request nvidia/container-toolkit/container-toolkit!539
2024-01-19 14:40:29 +00:00
Christopher Desiniotis
7ff23999e8 Add option to nvidia-ctk to enable CDI in docker
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-19 15:36:15 +01:00
Evan Lezar
a9b01a43bc Add cdi.enabled option to runtime configure
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-19 15:35:14 +01:00
Evan Lezar
ccff00bc30 Merge branch 'bump-version-v1.14.4' into 'release-1.14'
Bump version to v1.14.4

See merge request nvidia/container-toolkit/container-toolkit!537
2024-01-18 12:17:55 +00:00
Evan Lezar
f7d54200c6 Bump version to v1.14.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-18 13:17:17 +01:00
Evan Lezar
29fd206f3a Merge branch 'cherry-pick-1.14.4' into 'release-1.14'
Cherry pick changes for v1.14.4 release

See merge request nvidia/container-toolkit/container-toolkit!534
2024-01-17 22:51:28 +00:00
Evan Lezar
cfe0d5d07e Skip component updates
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 23:06:49 +01:00
Evan Lezar
9ab640b2be Set libnvidia-container branch
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 22:46:13 +01:00
Evan Lezar
9d2e4b48bc Update libnvidia-container to 1.14.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 22:33:44 +01:00
Evan Lezar
c050bcf081 Merge branch 'add-crun-as-configured-runtime' into 'main'
Set default low-level runtimes to runc, crun

See merge request nvidia/container-toolkit/container-toolkit!536
2024-01-17 22:29:58 +01:00
Evan Lezar
27d0fa4ee2 Merge branch 'bump-cuda-12.3.1' into 'main'
Bump CUDA base image to 12.3.1

See merge request nvidia/container-toolkit/container-toolkit!535
2024-01-11 15:27:27 +01:00
Evan Lezar
e0e22fdceb Merge branch 'fix-user-group' into 'main'
Fix bug in determining CLI user on SUSE systems

See merge request nvidia/container-toolkit/container-toolkit!532
2024-01-11 15:27:25 +01:00
Evan Lezar
c1eae0deda Merge branch 'libnvdxgdmal' into 'main'
Add libnvdxgdmal library

See merge request nvidia/container-toolkit/container-toolkit!529
2024-01-11 15:27:01 +01:00
Evan Lezar
68f0203a49 Merge branch 'remove-libseccomp-dependency' into 'main'
Remove libseccomp package dependency

See merge request nvidia/container-toolkit/container-toolkit!531
2024-01-11 15:26:37 +01:00
Evan Lezar
cc688f7c75 Merge branch 'log-requested-mode' into 'main'
Log explicitly requested runtime mode

See merge request nvidia/container-toolkit/container-toolkit!527
2024-01-11 15:26:03 +01:00
Evan Lezar
7566eb124a Merge branch 'fix-config-update-command' into 'main'
Switch to reflect package for config updates

See merge request nvidia/container-toolkit/container-toolkit!500
2024-01-11 15:25:33 +01:00
Evan Lezar
eb5d50abc4 Merge branch 'include-nvoptix' into 'main'
Update list of graphics mounts

See merge request nvidia/container-toolkit/container-toolkit!501
2024-01-11 15:25:09 +01:00
1581 changed files with 22324 additions and 536575 deletions

View File

@@ -19,6 +19,7 @@ default:
variables:
GIT_SUBMODULE_STRATEGY: recursive
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
BUILD_MULTI_ARCH_IMAGES: "true"
stages:
@@ -33,7 +34,6 @@ stages:
- test
- scan
- release
- sign
.pipeline-trigger-rules:
rules:
@@ -145,7 +145,7 @@ trigger-pipeline:
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
script:
- make -f deployments/container/Makefile test-${DIST}
- make -f build/container/Makefile test-${DIST}
# Define the test targets
test-packaging:
@@ -178,10 +178,13 @@ test-packaging:
OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}"
before_script:
- !reference [.regctl-setup, before_script]
# We ensure that the components of the output image are set:
- 'echo Image Name: ${OUT_IMAGE_NAME} ; [[ -n "${OUT_IMAGE_NAME}" ]] || exit 1'
# We ensure that the OUT_IMAGE_VERSION is set
- 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1'
# In the case where we are deploying a different version to the CI_COMMIT_SHA, we
# need to tag the image.
# Note: a leading 'v' is stripped from the version if present
- apk add --no-cache make bash
script:
# Log in to the "output" registry, tag the image and push the image
@@ -192,7 +195,7 @@ test-packaging:
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
# Target
- make -f deployments/container/Makefile push-${DIST}
- make -f build/container/Makefile push-${DIST}
# Define a staging release step that pushes an image to an internal "staging" repository
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
@@ -201,10 +204,10 @@ test-packaging:
extends:
- .release
variables:
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
OUT_REGISTRY: "${NGC_REGISTRY}"
OUT_IMAGE_NAME: "${NGC_REGISTRY_STAGING_IMAGE_NAME}"
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/container-toolkit"
# Define an external release step that pushes an image to an external repository.
# This includes a devlopment image off main.
@@ -222,6 +225,13 @@ test-packaging:
OUT_IMAGE_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}"
# Define the release jobs
release:staging-centos7:
extends:
- .release:staging
- .dist-centos7
needs:
- image-centos7
release:staging-ubi8:
extends:
- .release:staging

View File

@@ -1,3 +0,0 @@
# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/#configuration
enabled: true

120
.github/dependabot.yml vendored
View File

@@ -1,120 +0,0 @@
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
# main branch
- package-ecosystem: "gomod"
target-branch: main
directories:
- "/"
- "deployments/devel"
- "tests"
schedule:
interval: "daily"
labels:
- dependencies
groups:
k8sio:
patterns:
- k8s.io/*
exclude-patterns:
- k8s.io/klog/*
- package-ecosystem: "docker"
target-branch: main
directories:
# CUDA image
- "/deployments/container"
# Golang version
- "/deployments/devel"
schedule:
interval: "daily"
labels:
- dependencies
- package-ecosystem: "github-actions"
target-branch: main
directory: "/"
schedule:
interval: "daily"
labels:
- dependencies
# Allow dependabot to update the libnvidia-container submodule.
- package-ecosystem: "gitsubmodule"
target-branch: main
directory: "/"
allow:
- dependency-name: "third_party/libnvidia-container"
schedule:
interval: "daily"
labels:
- dependencies
- libnvidia-container
# The release branch(es):
- package-ecosystem: "gomod"
target-branch: release-1.17
directories:
- "/"
# We don't update development or test dependencies on release branches
# - "deployments/devel"
# - "tests"
schedule:
interval: "weekly"
day: "sunday"
labels:
- dependencies
- maintenance
ignore:
# For release branches we only consider patch updates.
- dependency-name: "*"
update-types:
- version-update:semver-major
- version-update:semver-minor
groups:
k8sio:
patterns:
- k8s.io/*
exclude-patterns:
- k8s.io/klog/*
- package-ecosystem: "docker"
target-branch: release-1.17
directories:
# CUDA image
- "/deployments/container"
# Golang version
- "/deployments/devel"
schedule:
interval: "weekly"
day: "sunday"
ignore:
# For release branches we only apply patch updates to the golang version.
- dependency-name: "*golang*"
update-types:
- version-update:semver-major
- version-update:semver-minor
labels:
- dependencies
- maintenance
- package-ecosystem: "github-actions"
target-branch: release-1.17
directory: "/"
schedule:
interval: "weekly"
day: "sunday"
labels:
- dependencies
- maintenance
# Github actions need to be gh-pages branches.
- package-ecosystem: "github-actions"
target-branch: gh-pages
directory: "/"
schedule:
interval: "daily"
labels:
- dependencies

View File

@@ -1,53 +0,0 @@
# Copyright 2025 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: CI Pipeline
on:
push:
branches:
- "pull-request/[0-9]+"
- main
- release-*
jobs:
code-scanning:
uses: ./.github/workflows/code_scanning.yaml
variables:
runs-on: ubuntu-latest
outputs:
version: ${{ steps.version.outputs.version }}
steps:
- name: Generate Commit Short SHA
id: version
run: echo "version=$(echo $GITHUB_SHA | cut -c1-8)" >> "$GITHUB_OUTPUT"
golang:
uses: ./.github/workflows/golang.yaml
image:
uses: ./.github/workflows/image.yaml
needs: [variables, golang, code-scanning]
secrets: inherit
with:
version: ${{ needs.variables.outputs.version }}
build_multi_arch_images: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release-') }}
e2e-test:
needs: [image, variables]
secrets: inherit
uses: ./.github/workflows/e2e.yaml
with:
version: ${{ needs.variables.outputs.version }}

View File

@@ -1,49 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "CodeQL"
on:
workflow_call: {}
pull_request:
types:
- opened
- synchronize
branches:
- main
- release-*
jobs:
analyze:
name: Analyze Go code with CodeQL
runs-on: ubuntu-latest
timeout-minutes: 360
permissions:
security-events: write
packages: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: go
build-mode: manual
- shell: bash
run: |
make build
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:go"

View File

@@ -1,103 +0,0 @@
# Copyright 2025 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: End-to-end Tests
on:
workflow_call:
inputs:
version:
required: true
type: string
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AWS_SSH_KEY:
required: true
E2E_SSH_USER:
required: true
SLACK_BOT_TOKEN:
required: true
SLACK_CHANNEL_ID:
required: true
jobs:
e2e-tests:
runs-on: linux-amd64-cpu4
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Calculate build vars
id: vars
run: |
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
GOLANG_VERSION=$(./hack/golang-version.sh)
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- name: Set up Holodeck
uses: NVIDIA/holodeck@v0.2.12
with:
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws_ssh_key: ${{ secrets.AWS_SSH_KEY }}
holodeck_config: "tests/e2e/infra/aws.yaml"
- name: Get public dns name
id: holodeck_public_dns_name
uses: mikefarah/yq@master
with:
cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
- name: Run e2e tests
env:
E2E_INSTALL_CTK: "true"
E2E_IMAGE_NAME: ghcr.io/nvidia/container-toolkit
E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
run: |
e2e_ssh_key=$(mktemp)
echo "${{ secrets.AWS_SSH_KEY }}" > "$e2e_ssh_key"
chmod 600 "$e2e_ssh_key"
export E2E_SSH_KEY="$e2e_ssh_key"
make -f tests/e2e/Makefile test
- name: Archive Ginkgo logs
uses: actions/upload-artifact@v4
with:
name: ginkgo-logs
path: ginkgo.json
retention-days: 15
- name: Send Slack alert notification
if: ${{ failure() }}
uses: slackapi/slack-github-action@v2.1.0
with:
method: chat.postMessage
token: ${{ secrets.SLACK_BOT_TOKEN }}
payload: |
channel: ${{ secrets.SLACK_CHANNEL_ID }}
text: |
:x: On repository ${{ github.repository }}, the Workflow *${{ github.workflow }}* has failed.
Details: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}

View File

@@ -1,102 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Golang
on:
workflow_call: {}
pull_request:
types:
- opened
- synchronize
branches:
- main
- release-*
jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$(./hack/golang-version.sh)
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- name: Lint
uses: golangci/golangci-lint-action@v8
with:
version: latest
args: -v --timeout 5m
skip-cache: true
- name: Check golang modules
run: |
make check-vendor
make -C deployments/devel check-modules
test:
name: Unit test
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$(./hack/golang-version.sh)
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- name: Run unit tests and generate coverage report
run: make coverage
- name: Upload to Coveralls
uses: coverallsapp/github-action@v2
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
file: coverage.out
build:
name: Build
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$(./hack/golang-version.sh)
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- run: make build

View File

@@ -1,126 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run this workflow on pull requests
name: image
on:
workflow_call:
inputs:
version:
required: true
type: string
build_multi_arch_images:
required: true
type: string
jobs:
packages:
runs-on: ubuntu-latest
strategy:
matrix:
target:
- ubuntu18.04-arm64
- ubuntu18.04-amd64
- ubuntu18.04-ppc64le
- centos7-aarch64
- centos7-x86_64
- centos8-ppc64le
ispr:
- ${{ github.ref_name != 'main' && !startsWith( github.ref_name, 'release-' ) }}
exclude:
- ispr: true
target: ubuntu18.04-arm64
- ispr: true
target: ubuntu18.04-ppc64le
- ispr: true
target: centos7-aarch64
- ispr: true
target: centos8-ppc64le
fail-fast: false
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:master
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: build ${{ matrix.target }} packages
run: |
sudo apt-get install -y coreutils build-essential sed git bash make
echo "Building packages"
./scripts/build-packages.sh ${{ matrix.target }}
- name: 'Upload Artifacts'
uses: actions/upload-artifact@v4
with:
compression-level: 0
name: toolkit-container-${{ matrix.target }}-${{ github.run_id }}
path: ${{ github.workspace }}/dist/*
image:
runs-on: ubuntu-latest
strategy:
matrix:
dist:
- ubuntu20.04
- ubi8
- packaging
ispr:
- ${{ github.ref_name != 'main' && !startsWith( github.ref_name, 'release-' ) }}
exclude:
- ispr: true
dist: ubi8
needs: packages
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:master
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get built packages
uses: actions/download-artifact@v4
with:
path: ${{ github.workspace }}/dist/
pattern: toolkit-container-*-${{ github.run_id }}
merge-multiple: true
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build image
env:
IMAGE_NAME: ghcr.io/nvidia/container-toolkit
VERSION: ${{ inputs.version }}
PUSH_ON_BUILD: "true"
BUILD_MULTI_ARCH_IMAGES: ${{ inputs.build_multi_arch_images }}
run: |
echo "${VERSION}"
make -f deployments/container/Makefile build-${{ matrix.dist }}

22
.github/workflows/pre-sanity.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: Run pre sanity
# run this workflow for each commit
on: [pull_request]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Build dev image
run: make .build-image
- name: Build
run: make docker-build
- name: Tests
run: make docker-coverage
- name: Checks
run: make docker-check

View File

@@ -1,38 +0,0 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run this workflow on new tags
name: Release
on:
push:
tags:
- v*
jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Prepare Artifacts
run: |
./hack/prepare-artifacts.sh ${{ github.ref_name }}
- name: Create Draft Release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
./hack/create-release.sh ${{ github.ref_name }}

16
.gitignore vendored
View File

@@ -1,11 +1,13 @@
/dist
/artifacts
dist
artifacts
*.swp
*.swo
/coverage.out*
/tests/output/
/nvidia-*
/test/output/
/nvidia-container-runtime
/nvidia-container-runtime.*
/nvidia-container-runtime-hook
/nvidia-container-toolkit
/nvidia-ctk
/shared-*
/release-*
/bin
/toolkit-test
/release-*

View File

@@ -15,6 +15,68 @@
include:
- .common-ci.yml
build-dev-image:
stage: image
script:
- apk --no-cache add make bash
- make .build-image
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- make .push-build-image
.requires-build-image:
image: "${BUILDIMAGE}"
.go-check:
extends:
- .requires-build-image
stage: go-checks
fmt:
extends:
- .go-check
script:
- make assert-fmt
vet:
extends:
- .go-check
script:
- make vet
lint:
extends:
- .go-check
script:
- make lint
allow_failure: true
ineffassign:
extends:
- .go-check
script:
- make ineffassign
allow_failure: true
misspell:
extends:
- .go-check
script:
- make misspell
go-build:
extends:
- .requires-build-image
stage: go-build
script:
- make build
unit-tests:
extends:
- .requires-build-image
stage: unit-tests
script:
- make coverage
# Define the package build helpers
.multi-arch-build:
before_script:
@@ -126,7 +188,15 @@ package-ubuntu18.04-ppc64le:
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
script:
- make -f deployments/container/Makefile build-${DIST}
- make -f build/container/Makefile build-${DIST}
image-centos7:
extends:
- .image-build
- .package-artifacts
- .dist-centos7
needs:
- package-centos7-x86_64
image-ubi8:
extends:
@@ -176,6 +246,12 @@ image-packaging:
optional: true
# Define publish test helpers
.test:toolkit:
extends:
- .integration
variables:
TEST_CASES: "toolkit"
.test:docker:
extends:
- .integration
@@ -226,3 +302,4 @@ test-docker-ubuntu20.04:
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04

4
.gitmodules vendored
View File

@@ -1,4 +1,4 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://github.com/NVIDIA/libnvidia-container.git
branch = main
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = release-1.14

View File

@@ -1,72 +0,0 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
version: "2"
linters:
enable:
- contextcheck
- gocritic
- gosec
- misspell
- unconvert
exclusions:
generated: lax
presets:
- comments
- common-false-positives
- legacy
- std-error-handling
rules:
# Exclude the gocritic dupSubExpr issue for cgo files.
- linters:
- gocritic
path: internal/dxcore/dxcore.go
text: dupSubExpr
# Exclude the checks for usage of returns to config.Delete(Path) in the
# crio and containerd config packages.
- linters:
- errcheck
path: pkg/config/engine/
text: config.Delete
# RENDERD refers to the Render Device and not the past tense of render.
- linters:
- misspell
path: .*.go
text: '`RENDERD` is a misspelling of `RENDERED`'
# The legacy hook relies on spec.Hooks.Prestart, which is deprecated as of
# the v1.2.0 OCI runtime spec.
- path: (.+)\.go$
text: SA1019:(.+).Prestart is deprecated(.+)
# TODO: We should address each of the following integer overflows.
- path: (.+)\.go$
text: 'G115: integer overflow conversion(.+)'
paths:
- third_party$
- builtin$
- examples$
formatters:
enable:
- gofmt
- goimports
settings:
goimports:
local-prefixes:
- github.com/NVIDIA/nvidia-container-toolkit
exclusions:
generated: lax
paths:
- third_party$
- builtin$
- examples$

View File

@@ -33,7 +33,7 @@ variables:
# On the multi-arch builder we don't need the qemu setup.
SKIP_QEMU_SETUP: "1"
# Define the public staging registry
STAGING_REGISTRY: ghcr.io/nvidia
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
KITMAKER_RELEASE_FOLDER: "kitmaker"
@@ -67,7 +67,12 @@ variables:
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
script:
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
- make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
image-centos7:
extends:
- .dist-centos7
- .image-pull
image-ubi8:
extends:
@@ -127,6 +132,14 @@ image-packaging:
- policy_evaluation.json
# Define the scan targets
scan-centos7-amd64:
extends:
- .dist-centos7
- .platform-amd64
- .scan
needs:
- image-centos7
scan-ubuntu20.04-amd64:
extends:
- .dist-ubuntu20.04
@@ -204,6 +217,23 @@ release:packages:kitmaker:
extends:
- .release:packages
release:archive:
extends:
- .release:external
needs:
- image-packaging
variables:
VERSION: "${CI_COMMIT_SHORT_SHA}"
PACKAGE_REGISTRY: "${CI_REGISTRY}"
PACKAGE_REGISTRY_USER: "${CI_REGISTRY_USER}"
PACKAGE_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
PACKAGE_ARCHIVE_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${PACKAGE_ARCHIVE_RELEASE_FOLDER}"
script:
- apk add --no-cache bash git
- ./scripts/archive-packages.sh "${PACKAGE_ARCHIVE_ARTIFACTORY_REPO}"
release:staging-ubuntu20.04:
extends:
- .release:staging
@@ -213,6 +243,11 @@ release:staging-ubuntu20.04:
# Define the external release targets
# Release to NGC
release:ngc-centos7:
extends:
- .dist-centos7
- .release:ngc
release:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04
@@ -227,62 +262,3 @@ release:ngc-packaging:
extends:
- .dist-packaging
- .release:ngc
# Define the external image signing steps for NGC
# Download the ngc cli binary for use in the sign steps
.ngccli-setup:
before_script:
- apt-get update && apt-get install -y curl unzip jq
- |
if [ -z "${NGCCLI_VERSION}" ]; then
NGC_VERSION_URL="https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions"
# Extract the latest version from the JSON data using jq
export NGCCLI_VERSION=$(curl -s $NGC_VERSION_URL | jq -r '.recipe.latestVersionIdStr')
fi
echo "NGCCLI_VERSION ${NGCCLI_VERSION}"
- curl -sSLo ngccli_linux.zip https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/${NGCCLI_VERSION}/files/ngccli_linux.zip
- unzip ngccli_linux.zip
- chmod u+x ngc-cli/ngc
# .sign forms the base of the deployment jobs which signs images in the CI registry.
# This is extended with the image name and version to be deployed.
.sign:ngc:
image: ubuntu:latest
stage: sign
rules:
- if: $CI_COMMIT_TAG
variables:
NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}"
IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
IMAGE_TAG: "${CI_COMMIT_TAG}-${DIST}"
retry:
max: 2
before_script:
- !reference [.ngccli-setup, before_script]
# We ensure that the IMAGE_NAME and IMAGE_TAG is set
- 'echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1'
- 'echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1'
script:
- 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"'
- ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia
sign:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04
- .sign:ngc
needs:
- release:ngc-ubuntu20.04
sign:ngc-ubi8:
extends:
- .dist-ubi8
- .sign:ngc
needs:
- release:ngc-ubi8
sign:ngc-packaging:
extends:
- .dist-packaging
- .sign:ngc
needs:
- release:ngc-packaging

View File

@@ -1,176 +1,27 @@
# NVIDIA Container Toolkit Changelog
## v1.17.4
- Disable mounting of compat libs from container by default
- Add allow-cuda-compat-libs-from-container feature flag
- Skip graphics modifier in CSV mode
- Properly pass configSearchPaths to a Driver constructor
- Add support for containerd version 3 config
- Add string TOML source
### Changes in libnvidia-container
- Add no-cntlibs CLI option to nvidia-container-cli
### Changes in the Toolkit Container
- Bump CUDA base image version to 12.6.3
## v1.17.3
- Only allow host-relative LDConfig paths by default.
### Changes in libnvidia-container
- Create virtual copy of host ldconfig binary before calling fexecve()
## v1.17.2
- Fixed a bug where legacy images would set imex channels as `all`.
## v1.17.1
- Fixed a bug where specific symlinks existing in a container image could cause a container to fail to start.
- Fixed a bug on Tegra-based systems where a container would fail to start.
- Fixed a bug where the default container runtime config path was not properly set.
### Changes in the Toolkit Container
- Fallback to using a config file if the current runtime config can not be determined from the command line.
## v1.17.0
- Promote v1.17.0-rc.2 to v1.17.0
- Fix bug when using just-in-time CDI spec generation
- Check for valid paths in create-symlinks hook
## v1.17.0-rc.2
- Fix bug in locating libcuda.so from ldcache
- Fix bug in sorting of symlink chain
- Remove unsupported print-ldcache command
- Remove csv-filename support from create-symlinks
### Changes in the Toolkit Container
- Fallback to `crio-status` if `crio status` does not work when configuring the crio runtime
## v1.17.0-rc.1
- Allow IMEX channels to be requested as volume mounts
- Fix typo in error message
- Add disable-imex-channel-creation feature flag
- Add -z,lazy to LDFLAGS
- Add imex channels to management CDI spec
- Add support to fetch current container runtime config from the command line.
- Add creation of select driver symlinks to CDI spec generation.
- Remove support for config overrides when configuring runtimes.
- Skip explicit creation of libnvidia-allocator.so.1 symlink
- Add vdpau as as a driver library search path.
- Add support for using libnvsandboxutils to generate CDI specifications.
### Changes in the Toolkit Container
- Allow opt-in features to be selected when deploying the toolkit-container.
- Bump CUDA base image version to 12.6.2
- Remove support for config overrides when configuring runtimes.
### Changes in libnvidia-container
- Add no-create-imex-channels command line option.
## v1.16.2
- Exclude libnvidia-allocator from graphics mounts. This fixes a bug that leaks mounts when a container is started with bi-directional mount propagation.
- Use empty string for default runtime-config-override. This removes a redundant warning for runtimes (e.g. Docker) where this is not applicable.
### Changes in the Toolkit Container
- Bump CUDA base image version to 12.6.0
### Changes in libnvidia-container
- Add no-gsp-firmware command line option
- Add no-fabricmanager command line option
- Add no-persistenced command line option
- Skip directories and symlinks when mounting libraries.
## v1.16.1
- Fix bug with processing errors during CDI spec generation for MIG devices
## v1.16.0
- Promote v1.16.0-rc.2 to v1.16.0
### Changes in the Toolkit Container
- Bump CUDA base image version to 12.5.1
## v1.16.0-rc.2
- Use relative path to locate driver libraries
- Add RelativeToRoot function to Driver
- Inject additional libraries for full X11 functionality
- Extract options from default runtime if runc does not exist
- Avoid using map pointers as maps are always passed by reference
- Reduce logging for the NVIDIA Container runtime
- Fix bug in argument parsing for logger creation
## v1.16.0-rc.1
- Support vulkan ICD files directly in a driver root. This allows for the discovery of vulkan files in GKE driver installations.
- Increase priority of ld.so.conf.d config file injected into container. This ensures that injected libraries are preferred over libraries present in the container.
- Set default CDI spec permissions to 644. This fixes permission issues when using the `nvidia-ctk cdi transform` functions.
- Add `dev-root` option to `nvidia-ctk system create-device-nodes` command.
- Fix location of `libnvidia-ml.so.1` when a non-standard driver root is used. This enabled CDI spec generation when using the driver container on a host.
- Recalculate minimum required CDI spec version on save.
- Move `nvidia-ctk hook` commands to a separate `nvidia-cdi-hook` binary. The same subcommands are supported.
- Use `:` as an `nvidia-ctk config --set` list separator. This fixes a bug when trying to set config options that are lists.
- [toolkit-container] Bump CUDA base image version to 12.5.0
- [toolkit-container] Allow the path to `toolkit.pid` to be specified directly.
- [toolkit-container] Remove provenance information from image manifests.
- [toolkit-container] Add `dev-root` option when configuring the toolkit. This adds support for GKE driver installations.
## v1.15.0
* Remove `nvidia-container-runtime` and `nvidia-docker2` packages.
* Use `XDG_DATA_DIRS` environment variable when locating config files such as graphics config files.
* Add support for v0.7.0 Container Device Interface (CDI) specification.
* Add `--config-search-path` option to `nvidia-ctk cdi generate` command. These paths are used when locating driver files such as graphics config files.
* Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
* Add support for v1.2.0 OCI Runtime specification.
* Explicitly set `NVIDIA_VISIBLE_DEVICES=void` in generated CDI specifications. This prevents the NVIDIA Container Runtime from making additional modifications.
* [libnvidia-container] Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
* [toolkit-container] Bump CUDA base image version to 12.4.1
## v1.15.0-rc.4
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
## v1.14.6
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
CDI spec to be generated that includes GPUs by index and UUID.
* Set the default `--device-name-strategy` for the `nvidia-ctk cdi generate` command to `[index, uuid]`.
* Remove `libnvidia-container0` jetpack dependency included for legacy Tegra-based systems.
* Add `NVIDIA_VISIBLE_DEVICES=void` to generated CDI specifications.
* Add support for selecting IMEX channels using the NVIDIA_IMEX_CHANNELS environement variable.
* [toolkit-container] Remove centos7 image. The ubi8 image can be used on all RPM-based platforms.
* [toolkit-container] Bump CUDA base image version to 12.3.2
## v1.14.5
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker. This was incorrectly setting `experimental = true` instead
of setting `features.cdi = true`.
## v1.15.0-rc.3
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.
## v1.15.0-rc.2
* Extend the `runtime.nvidia.com/gpu` CDI kind to support full-GPUs and MIG devices specified by index or UUID.
* Fix bug when specifying `--dev-root` for Tegra-based systems.
* Log explicitly requested runtime mode.
* Remove package dependency on libseccomp.
* Added detection of libnvdxgdmal.so.1 on WSL2
* Use devRoot to resolve MIG device nodes.
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
* Add `crun` to the list of configured low-level runtimes.
* Added support for `--ldconfig-path` to `nvidia-ctk cdi generate` command.
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker.
* Add discovery of the GDRCopy device (`gdrdrv`) if the `NVIDIA_GDRCOPY` environment variable of the container is set to `enabled`
* [toolkit-container] Bump CUDA base image version to 12.3.1.
## v1.15.0-rc.1
* Skip update of ldcache in containers without ldconfig. The .so.SONAME symlinks are still created.
* Normalize ldconfig path on use. This automatically adjust the ldconfig setting applied to ldconfig.real on systems where this exists.
## v1.14.4
* Include `nvidia/nvoptix.bin` in list of graphics mounts.
* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts.
* Add support for `--library-search-paths` to `nvidia-ctk cdi generate` command.
* Add support for injecting /dev/nvidia-nvswitch* devices if the NVIDIA_NVSWITCH=enabled envvar is specified.
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly.
* Add `--relative-to` option to `nvidia-ctk transform root` command. This controls whether the root transformation is applied to host or container paths.
* Added automatic CDI spec generation when the `runtime.nvidia.com/gpu=all` device is requested by a container.
* Log explicitly requested runtime mode.
* Remove package dependency on libseccomp.
* Added detection of libnvdxgdmal.so.1 on WSL2.
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
* Add `crun` to the list of configured low-level runtimes.
* Add `--cdi.enabled` option to `nvidia-ctk runtime configure` command to enable CDI in containerd.
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
* [libnvidia-container] Fix device permission check when using cgroupv2 (fixes #227)
* [toolkit-container] Bump CUDA base image version to 12.3.1.
* [libnvidia-container] Added detection of libnvdxgdmal.so.1 on WSL2.
## v1.14.3
* [toolkit-container] Bump CUDA base image version to 12.2.2.
@@ -202,7 +53,7 @@
## v1.14.0-rc.2
* Fix bug causing incorrect nvidia-smi symlink to be created on WSL2 systems with multiple driver roots.
* Remove dependency on coreutils when installing package on RPM-based systems.
* Create output folders if required when running `nvidia-ctk runtime configure`
* Create ouput folders if required when running `nvidia-ctk runtime configure`
* Generate default config as post-install step.
* Added support for detecting GSP firmware at custom paths when generating CDI specifications.
* Added logic to skip the extraction of image requirements if `NVIDIA_DISABLE_REQUIRES` is set to `true`.

View File

@@ -34,7 +34,7 @@ environment variables.
## Testing packages locally
The [tests/release](./tests/release/) folder contains documentation on how the installation of local or staged packages can be tested.
The [test/release](./test/release/) folder contains documentation on how the installation of local or staged packages can be tested.
## Releasing

142
Jenkinsfile vendored Normal file
View File

@@ -0,0 +1,142 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
podTemplate (cloud:'sw-gpu-cloudnative',
containers: [
containerTemplate(name: 'docker', image: 'docker:dind', ttyEnabled: true, privileged: true),
containerTemplate(name: 'golang', image: 'golang:1.16.3', ttyEnabled: true)
]) {
node(POD_LABEL) {
def scmInfo
stage('checkout') {
scmInfo = checkout(scm)
}
stage('dependencies') {
container('golang') {
sh 'GO111MODULE=off go get -u github.com/client9/misspell/cmd/misspell'
sh 'GO111MODULE=off go get -u github.com/gordonklaus/ineffassign'
sh 'GO111MODULE=off go get -u golang.org/x/lint/golint'
}
container('docker') {
sh 'apk add --no-cache make bash git'
}
}
stage('check') {
parallel (
getGolangStages(["assert-fmt", "lint", "vet", "ineffassign", "misspell"])
)
}
stage('test') {
parallel (
getGolangStages(["test"])
)
}
def versionInfo
stage('version') {
container('docker') {
versionInfo = getVersionInfo(scmInfo)
println "versionInfo=${versionInfo}"
}
}
def dist = 'ubuntu20.04'
def arch = 'amd64'
def stageLabel = "${dist}-${arch}"
stage('build-one') {
container('docker') {
stage (stageLabel) {
sh "make ${dist}-${arch}"
}
}
}
stage('release') {
container('docker') {
stage (stageLabel) {
def component = 'main'
def repository = 'sw-gpu-cloudnative-debian-local/pool/main/'
def uploadSpec = """{
"files":
[ {
"pattern": "./dist/${dist}/${arch}/*.deb",
"target": "${repository}",
"props": "deb.distribution=${dist};deb.component=${component};deb.architecture=${arch}"
}
]
}"""
sh "echo starting release with versionInfo=${versionInfo}"
if (versionInfo.isTag) {
// upload to artifactory repository
def server = Artifactory.server 'sw-gpu-artifactory'
server.upload spec: uploadSpec
} else {
sh "echo skipping release for non-tagged build"
}
}
}
}
}
}
def getGolangStages(def targets) {
stages = [:]
for (t in targets) {
stages[t] = getLintClosure(t)
}
return stages
}
def getLintClosure(def target) {
return {
container('golang') {
stage(target) {
sh "make ${target}"
}
}
}
}
// getVersionInfo returns a hash of version info
def getVersionInfo(def scmInfo) {
def versionInfo = [
isTag: isTag(scmInfo.GIT_BRANCH)
]
scmInfo.each { k, v -> versionInfo[k] = v }
return versionInfo
}
def isTag(def branch) {
if (!branch.startsWith('v')) {
return false
}
def version = shOutput('git describe --all --exact-match --always')
return version == "tags/${branch}"
}
def shOuptut(def script) {
return sh(script: script, returnStdout: true).trim()
}

116
Makefile
View File

@@ -38,8 +38,8 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
CHECK_TARGETS := lint
MAKE_TARGETS := binaries build check fmt test examples cmds coverage generate licenses vendor check-vendor $(CHECK_TARGETS)
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
@@ -53,26 +53,22 @@ CLI_VERSION = $(VERSION)
endif
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
GOOS ?= linux
binaries: cmds
ifneq ($(PREFIX),)
cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)
ifneq ($(shell uname),Darwin)
EXTLDFLAGS = -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files -Wl,-z,lazy
else
EXTLDFLAGS = -Wl,-undefined,dynamic_lookup
endif
$(CMD_TARGETS): cmd-%:
go build -ldflags "-s -w '-extldflags=$(EXTLDFLAGS)' -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
build:
go build ./...
GOOS=$(GOOS) go build ./...
examples: $(EXAMPLE_TARGETS)
$(EXAMPLE_TARGETS): example-%:
go build ./examples/$(*)
GOOS=$(GOOS) go build ./examples/$(*)
all: check test build binary
check: $(CHECK_TARGETS)
@@ -82,75 +78,71 @@ fmt:
go list -f '{{.Dir}}' $(MODULE)/... \
| xargs gofmt -s -l -w
# Apply goimports -local github.com/NVIDIA/container-toolkit to the codebase
goimports:
go list -f {{.Dir}} $(MODULE)/... \
| xargs goimports -local $(MODULE) -w
assert-fmt:
go list -f '{{.Dir}}' $(MODULE)/... \
| xargs gofmt -s -l > fmt.out
@if [ -s fmt.out ]; then \
echo "\nERROR: The following files are not formatted:\n"; \
cat fmt.out; \
rm fmt.out; \
exit 1; \
else \
rm fmt.out; \
fi
ineffassign:
ineffassign $(MODULE)/...
lint:
golangci-lint run ./...
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
vendor: | mod-tidy mod-vendor mod-verify
misspell:
misspell $(MODULE)/...
mod-tidy:
@for mod in $$(find . -name go.mod -not -path "./testdata/*" -not -path "./third_party/*"); do \
echo "Tidying $$mod..."; ( \
cd $$(dirname $$mod) && go mod tidy \
) || exit 1; \
done
mod-vendor:
@for mod in $$(find . -name go.mod -not -path "./testdata/*" -not -path "./third_party/*" -not -path "./deployments/*"); do \
echo "Vendoring $$mod..."; ( \
cd $$(dirname $$mod) && go mod vendor \
) || exit 1; \
done
mod-verify:
@for mod in $$(find . -name go.mod -not -path "./testdata/*" -not -path "./third_party/*"); do \
echo "Verifying $$mod..."; ( \
cd $$(dirname $$mod) && go mod verify | sed 's/^/ /g' \
) || exit 1; \
done
check-vendor: vendor
git diff --exit-code HEAD -- go.mod go.sum vendor
vet:
go vet $(MODULE)/...
licenses:
go-licenses csv $(MODULE)/...
COVERAGE_FILE := coverage.out
test: build cmds
go test -coverprofile=$(COVERAGE_FILE).with-mocks $(MODULE)/...
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
coverage: test
cat $(COVERAGE_FILE).with-mocks | grep -v "_mock.go" > $(COVERAGE_FILE)
go tool cover -func=$(COVERAGE_FILE)
cat $(COVERAGE_FILE) | grep -v "_mock.go" > $(COVERAGE_FILE).no-mocks
go tool cover -func=$(COVERAGE_FILE).no-mocks
generate:
go generate $(MODULE)/...
# Generate an image for containerized builds
# Note: This image is local only
.PHONY: .build-image
.build-image:
make -f deployments/devel/Makefile .build-image
.PHONY: .build-image .pull-build-image .push-build-image
.build-image: docker/Dockerfile.devel
if [ x"$(SKIP_IMAGE_BUILD)" = x"" ]; then \
$(DOCKER) build \
--progress=plain \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--tag $(BUILDIMAGE) \
-f $(^) \
docker; \
fi
ifeq ($(BUILD_DEVEL_IMAGE),yes)
$(DOCKER_TARGETS): .build-image
.shell: .build-image
endif
.pull-build-image:
$(DOCKER) pull $(BUILDIMAGE)
$(DOCKER_TARGETS): docker-%:
@echo "Running 'make $(*)' in container image $(BUILDIMAGE)"
.push-build-image:
$(DOCKER) push $(BUILDIMAGE)
$(DOCKER_TARGETS): docker-%: .build-image
@echo "Running 'make $(*)' in docker container $(BUILDIMAGE)"
$(DOCKER) run \
--rm \
-e GOCACHE=/tmp/.cache/go \
-e GOMODCACHE=/tmp/.cache/gomod \
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
-v $(PWD):/work \
-w /work \
-e GOCACHE=/tmp/.cache \
-v $(PWD):$(PWD) \
-w $(PWD) \
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE) \
make $(*)
@@ -161,10 +153,8 @@ PHONY: .shell
$(DOCKER) run \
--rm \
-ti \
-e GOCACHE=/tmp/.cache/go \
-e GOMODCACHE=/tmp/.cache/gomod \
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
-v $(PWD):/work \
-w /work \
-e GOCACHE=/tmp/.cache \
-v $(PWD):$(PWD) \
-w $(PWD) \
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE)

View File

@@ -1,36 +0,0 @@
# Release Process
The NVIDIA Container Toolkit consists of the following artifacts:
- The NVIDIA Container Toolkit container
- Packages for debian-based systems
- Packages for rpm-based systems
# Release Process Checklist:
- [ ] Create a release PR:
- [ ] Run the `./hack/prepare-release.sh` script to update the version in all the needed files. This also creates a [release issue](https://github.com/NVIDIA/cloud-native-team/issues?q=is%3Aissue+is%3Aopen+label%3Arelease)
- [ ] Run the `./hack/generate-changelog.sh` script to generate the a draft changelog and update `CHANGELOG.md` with the changes.
- [ ] Create a PR from the created `bump-release-{{ .VERSION }}` branch.
- [ ] Merge the release PR
- [ ] Tag the release and push the tag to the `internal` mirror:
- [ ] Image release pipeline: https://gitlab-master.nvidia.com/dl/container-dev/container-toolkit/-/pipelines/16466098
- [ ] Wait for the image release to complete.
- [ ] Push the tag to the the upstream GitHub repo.
- [ ] Wait for the [`Release`](https://github.com/NVIDIA/k8s-device-plugin/actions/workflows/release.yaml) GitHub Action to complete
- [ ] Publish the [draft release](https://github.com/NVIDIA/k8s-device-plugin/releases) created by the GitHub Action
- [ ] Publish the packages to the gh-pages branch of the libnvidia-container repo
- [ ] Create a KitPick
## Troubleshooting
*Note*: This assumes that we have the release tag checked out locally.
- If the `Release` GitHub Action fails:
- Check the logs for the error first.
- Create the helm packages locally by running:
```bash
./hack/prepare-artifacts.sh {{ .VERSION }}
```
- Create the draft release by running:
```bash
./hack/create-release.sh {{ .VERSION }}
```

View File

@@ -1,24 +0,0 @@
# Security
NVIDIA is dedicated to the security and trust of our software products and services, including all source code repositories managed through our organization.
If you need to report a security issue, please use the appropriate contact points outlined below. **Please do not report security vulnerabilities through GitHub.**
## Reporting Potential Security Vulnerability in an NVIDIA Product
To report a potential security vulnerability in any NVIDIA product:
- Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
- E-Mail: psirt@nvidia.com
- We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
- Please include the following information:
- Product/Driver name and version/branch that contains the vulnerability
- Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
- Instructions to reproduce the vulnerability
- Proof-of-concept or exploit code
- Potential impact of the vulnerability, including how an attacker could exploit the vulnerability
While NVIDIA currently does not have a bug bounty program, we do offer acknowledgement when an externally reported security issue is addressed under our coordinated vulnerability disclosure policy. Please visit our [Product Security Incident Response Team (PSIRT)](https://www.nvidia.com/en-us/security/psirt-policies/) policies page for more information.
## NVIDIA Product Security
For all security-related concerns, please visit NVIDIA's Product Security portal at https://www.nvidia.com/en-us/security

View File

@@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi8 AS build
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
RUN yum install -y \
wget make git gcc \
@@ -29,25 +31,36 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH=/go
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build
COPY . .
RUN mkdir /artifacts
ARG VERSION="N/A"
ARG GIT_COMMIT="unknown"
RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
# NOTE: Until the config utilities are properly integrated into the
# nvidia-container-toolkit repository, these are built from the `tools` folder
# and not `cmd`.
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi8
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG BASE_DIST
# See https://www.centos.org/centos-linux-eol/
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
( \
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
)
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=void
@@ -61,8 +74,7 @@ WORKDIR /artifacts/packages
ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH=${TARGETARCH}
ENV PACKAGE_ARCH ${TARGETARCH}
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
yum localinstall -y \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
@@ -71,12 +83,10 @@ RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm
WORKDIR /work
COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
RUN ln -s nvidia-ctk-installer nvidia-toolkit
COPY --from=build /artifacts/bin /work
ENV PATH=/work:$PATH
ARG VERSION
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
LABEL name="NVIDIA Container Runtime Config"
LABEL vendor="NVIDIA"
@@ -87,4 +97,4 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
ENTRYPOINT ["/work/nvidia-ctk-installer"]
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG ARTIFACTS_ROOT
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
@@ -22,6 +24,7 @@ COPY ${ARTIFACTS_ROOT} /artifacts/packages/
WORKDIR /artifacts/packages
# build-args are added to the manifest.txt file below.
ARG BASE_DIST
ARG PACKAGE_DIST
ARG PACKAGE_VERSION
ARG GIT_BRANCH

View File

@@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04 AS build
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
RUN apt-get update && \
apt-get install -y wget make git gcc \
@@ -29,24 +31,25 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH=/go
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build
COPY . .
RUN mkdir /artifacts
ARG VERSION="N/A"
ARG GIT_COMMIT="unknown"
RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
# NOTE: Until the config utilities are properly integrated into the
# nvidia-container-toolkit repository, these are built from the `tools` folder
# and not `cmd`.
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
RUN rm -f /etc/apt/sources.list.d/cuda.list
@@ -70,7 +73,15 @@ WORKDIR /artifacts/packages
ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH=${TARGETARCH}
ENV PACKAGE_ARCH ${TARGETARCH}
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container/stable"
ARG LIBNVIDIA_CONTAINER0_VERSION
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
fi
RUN dpkg -i \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
@@ -79,12 +90,10 @@ RUN dpkg -i \
WORKDIR /work
COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
RUN ln -s nvidia-ctk-installer nvidia-toolkit
COPY --from=build /artifacts/bin /work/
ENV PATH=/work:$PATH
ARG VERSION
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
LABEL name="NVIDIA Container Runtime Config"
LABEL vendor="NVIDIA"
@@ -95,4 +104,4 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
ENTRYPOINT ["/work/nvidia-ctk-installer"]
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -27,6 +27,12 @@ DIST_DIR ?= $(CURDIR)/dist
##### Global variables #####
include $(CURDIR)/versions.mk
ifeq ($(IMAGE_NAME),)
REGISTRY ?= nvidia
IMAGE_NAME := $(REGISTRY)/container-toolkit
endif
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
IMAGE_VERSION := $(VERSION)
IMAGE_TAG ?= $(VERSION)-$(DIST)
@@ -39,11 +45,10 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DEFAULT_PUSH_TARGET := ubuntu20.04
DISTRIBUTIONS := ubuntu20.04 ubi8
DISTRIBUTIONS := ubuntu20.04 ubi8 centos7
META_TARGETS := packaging
IMAGE_TARGETS := $(patsubst %,image-%,$(DISTRIBUTIONS) $(META_TARGETS))
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
@@ -51,9 +56,9 @@ TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
include $(CURDIR)/deployments/container/native-only.mk
include $(CURDIR)/build/container/native-only.mk
else
include $(CURDIR)/deployments/container/multi-arch.mk
include $(CURDIR)/build/container/multi-arch.mk
endif
# For the default push target we also push a short tag equal to the version.
@@ -79,20 +84,22 @@ push-short:
build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SUFFIX)
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
# Use a generic build target to build the relevant images
$(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
DOCKER_BUILDKIT=1 \
$(DOCKER) $(BUILDX) build --pull \
--provenance=false --sbom=false \
$(DOCKER_BUILD_OPTIONS) \
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
--tag $(IMAGE) \
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
--build-arg BASE_DIST="$(BASE_DIST)" \
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg VERSION="$(VERSION)" \
@@ -103,12 +110,21 @@ $(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
-f $(DOCKERFILE) \
$(CURDIR)
build-ubuntu%: BASE_DIST = $(*)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
build-ubi8: DOCKERFILE_SUFFIX := ubi8
build-ubi8: BASE_DIST := ubi8
build-ubi8: DOCKERFILE_SUFFIX := centos
build-ubi8: PACKAGE_DIST = centos7
build-centos7: BASE_DIST = $(*)
build-centos7: DOCKERFILE_SUFFIX := centos
build-centos7: PACKAGE_DIST = $(BASE_DIST)
build-packaging: BASE_DIST := ubuntu20.04
build-packaging: DOCKERFILE_SUFFIX := packaging
build-packaging: PACKAGE_ARCH := amd64
build-packaging: PACKAGE_DIST = all
@@ -116,13 +132,7 @@ build-packaging: PACKAGE_DIST = all
# Test targets
test-%: DIST = $(*)
# Handle the default build target.
.PHONY: build
build: $(DEFAULT_PUSH_TARGET)
$(DEFAULT_PUSH_TARGET): build-$(DEFAULT_PUSH_TARGET)
$(DEFAULT_PUSH_TARGET): DIST = $(DEFAULT_PUSH_TARGET)
TEST_CASES ?= docker crio containerd
TEST_CASES ?= toolkit docker crio containerd
$(TEST_TARGETS): test-%:
TEST_CASES="$(TEST_CASES)" bash -x $(CURDIR)/test/container/main.sh run \
$(CURDIR)/shared-$(*) \

View File

@@ -16,7 +16,11 @@ PUSH_ON_BUILD ?= false
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
$(BUILD_TARGETS): build-%: image-%
# We only have x86_64 packages for centos7
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
# We only generate amd64 image for ubuntu18.04
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
# We only generate a single image for packaging targets
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

View File

@@ -1,5 +1,4 @@
/**
# Copyright 2024 NVIDIA CORPORATION
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,12 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
// WARNING: THIS FILE WAS AUTOMATICALLY GENERATED.
// Code generated by https://git.io/c-for-go. DO NOT EDIT.
/*
Package NVSANDBOXUTILS bindings
*/
package nvsandboxutils
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

View File

@@ -1,31 +0,0 @@
# NVIDIA CDI Hook
The CLI `nvidia-cdi-hook` provides container device runtime hook capabilities when
called by a container runtime, as specific in a
[Container Device Interface](https://tags.cncf.io/container-device-interface/blob/main/SPEC.md)
file.
## Generating a CDI
The CDI itself is created for an NVIDIA-capable device using the
[`nvidia-ctk cdi generate`](../nvidia-ctk/) command.
When `nvidia-ctk cdi generate` is run, the CDI specification is generated as a yaml file.
The CDI specification provides instructions for a container runtime to set up devices, files and
other resources for the container prior to starting it. Those instructions
may include executing command-line tools to prepare the filesystem. The execution
of such command-line tools is called a hook.
`nvidia-cdi-hook` is the CLI tool that is expected to be called by the container runtime,
when specified by the CDI file.
See the [`nvidia-ctk` documentation](../nvidia-ctk/README.md) for more information
on generating a CDI file.
## Functionality
The `nvidia-cdi-hook` CLI provides the following functionality:
* `chmod` - Change the permissions of a file or directory inside the directory path to be mounted into a container.
* `create-symlinks` - Create symlinks inside the directory path to be mounted into a container.
* `update-ldcache` - Update the dynamic linker cache inside the directory path to be mounted into a container.

View File

@@ -1,53 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package commands
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat"
disabledevicenodemodification "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/disable-device-node-modification"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
// New creates the commands associated with supported CDI hooks.
// These are shared by the nvidia-cdi-hook and nvidia-ctk hook commands.
func New(logger logger.Interface) []*cli.Command {
return []*cli.Command{
ldcache.NewCommand(logger),
symlinks.NewCommand(logger),
chmod.NewCommand(logger),
cudacompat.NewCommand(logger),
disabledevicenodemodification.NewCommand(logger),
}
}
// IssueUnsupportedHookWarning logs a warning that no hook or an unsupported
// hook has been specified.
// This happens if a subcommand is provided that does not match one of the
// subcommands that has been explicitly specified.
func IssueUnsupportedHookWarning(logger logger.Interface, c *cli.Context) {
args := c.Args().Slice()
if len(args) == 0 {
logger.Warningf("No CDI hook specified")
} else {
logger.Warningf("Unsupported CDI hook: %v", args[0])
}
}

View File

@@ -1,172 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package symlinks
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/moby/sys/symlink"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
type command struct {
logger logger.Interface
}
type config struct {
links cli.StringSlice
containerSpec string
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the create-symlink command.
func (m command) build() *cli.Command {
cfg := config{}
c := cli.Command{
Name: "create-symlinks",
Usage: "A hook to create symlinks in the container.",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "link",
Usage: "Specify a specific link to create. The link is specified as target::link. If the link exists in the container root, it is removed.",
Destination: &cfg.links,
},
// The following flags are testing-only flags.
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN. This is only intended for testing.",
Destination: &cfg.containerSpec,
Hidden: true,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
created := make(map[string]bool)
for _, l := range cfg.links.Value() {
if created[l] {
m.logger.Debugf("Link %v already processed", l)
continue
}
parts := strings.Split(l, "::")
if len(parts) != 2 {
return fmt.Errorf("invalid symlink specification %v", l)
}
err := m.createLink(containerRoot, parts[0], parts[1])
if err != nil {
return fmt.Errorf("failed to create link %v: %w", parts, err)
}
created[l] = true
}
return nil
}
// createLink creates a symbolic link in the specified container root.
// This is equivalent to:
//
// chroot {{ .containerRoot }} ln -f -s {{ .target }} {{ .link }}
//
// If the specified link already exists and points to the same target, this
// operation is a no-op.
// If a file exists at the link path or the link points to a different target
// this file is removed before creating the link.
//
// Note that if the link path resolves to an absolute path oudside of the
// specified root, this is treated as an absolute path in this root.
func (m command) createLink(containerRoot string, targetPath string, link string) error {
linkPath := filepath.Join(containerRoot, link)
exists, err := linkExists(targetPath, linkPath)
if err != nil {
return fmt.Errorf("failed to check if link exists: %w", err)
}
if exists {
m.logger.Debugf("Link %s already exists", linkPath)
return nil
}
// We resolve the parent of the symlink that we're creating in the container root.
// If we resolve the full link path, an existing link at the location itself
// is also resolved here and we are unable to force create the link.
resolvedLinkParent, err := symlink.FollowSymlinkInScope(filepath.Dir(linkPath), containerRoot)
if err != nil {
return fmt.Errorf("failed to follow path for link %v relative to %v: %w", link, containerRoot, err)
}
resolvedLinkPath := filepath.Join(resolvedLinkParent, filepath.Base(linkPath))
m.logger.Infof("Symlinking %v to %v", resolvedLinkPath, targetPath)
err = os.MkdirAll(filepath.Dir(resolvedLinkPath), 0755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
err = symlinks.ForceCreate(targetPath, resolvedLinkPath)
if err != nil {
return fmt.Errorf("failed to create symlink: %v", err)
}
return nil
}
// linkExists checks whether the specified link exists.
// A link exists if the path exists, is a symlink, and points to the specified target.
func linkExists(target string, link string) (bool, error) {
currentTarget, err := symlinks.Resolve(link)
if errors.Is(err, os.ErrNotExist) {
return false, nil
}
if err != nil {
return false, fmt.Errorf("failed to resolve existing symlink %s: %w", link, err)
}
if currentTarget == target {
return true, nil
}
return false, nil
}

View File

@@ -1,297 +0,0 @@
package symlinks
import (
"os"
"path/filepath"
"strings"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
)
func TestLinkExist(t *testing.T) {
tmpDir := t.TempDir()
require.NoError(
t,
makeFs(tmpDir,
dirOrLink{path: "/a/b/c", target: "d"},
dirOrLink{path: "/a/b/e", target: "/a/b/f"},
),
)
exists, err := linkExists("d", filepath.Join(tmpDir, "/a/b/c"))
require.NoError(t, err)
require.True(t, exists)
exists, err = linkExists("/a/b/f", filepath.Join(tmpDir, "/a/b/e"))
require.NoError(t, err)
require.True(t, exists)
exists, err = linkExists("different-target", filepath.Join(tmpDir, "/a/b/c"))
require.NoError(t, err)
require.False(t, exists)
exists, err = linkExists("/a/b/d", filepath.Join(tmpDir, "/a/b/c"))
require.NoError(t, err)
require.False(t, exists)
exists, err = linkExists("foo", filepath.Join(tmpDir, "/a/b/does-not-exist"))
require.NoError(t, err)
require.False(t, exists)
}
func TestCreateLink(t *testing.T) {
type link struct {
path string
target string
}
type expectedLink struct {
link
err error
}
testCases := []struct {
description string
containerContents []dirOrLink
link link
expectedCreateError error
expectedLinks []expectedLink
}{
{
description: "link to / resolves to container root",
containerContents: []dirOrLink{
{path: "/lib/foo", target: "/"},
},
link: link{
path: "/lib/foo/libfoo.so",
target: "libfoo.so.1",
},
expectedLinks: []expectedLink{
{
link: link{
path: "{{ .containerRoot }}/libfoo.so",
target: "libfoo.so.1",
},
},
},
},
{
description: "link to / resolves to container root; parent relative link",
containerContents: []dirOrLink{
{path: "/lib/foo", target: "/"},
},
link: link{
path: "/lib/foo/libfoo.so",
target: "../libfoo.so.1",
},
expectedLinks: []expectedLink{
{
link: link{
path: "{{ .containerRoot }}/libfoo.so",
target: "../libfoo.so.1",
},
},
},
},
{
description: "link to / resolves to container root; absolute link",
containerContents: []dirOrLink{
{path: "/lib/foo", target: "/"},
},
link: link{
path: "/lib/foo/libfoo.so",
target: "/a-path-in-container/foo/libfoo.so.1",
},
expectedLinks: []expectedLink{
{
link: link{
path: "{{ .containerRoot }}/libfoo.so",
target: "/a-path-in-container/foo/libfoo.so.1",
},
},
{
// We also check that the target is NOT created.
link: link{
path: "{{ .containerRoot }}/a-path-in-container/foo/libfoo.so.1",
},
err: os.ErrNotExist,
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
tmpDir := t.TempDir()
hostRoot := filepath.Join(tmpDir, "/host-root/")
containerRoot := filepath.Join(tmpDir, "/container-root")
require.NoError(t, makeFs(hostRoot))
require.NoError(t, makeFs(containerRoot, tc.containerContents...))
// nvidia-cdi-hook create-symlinks --link linkSpec
err := getTestCommand().createLink(containerRoot, tc.link.target, tc.link.path)
// TODO: We may be able to replace this with require.ErrorIs.
if tc.expectedCreateError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
for _, expectedLink := range tc.expectedLinks {
path := strings.ReplaceAll(expectedLink.path, "{{ .containerRoot }}", containerRoot)
path = strings.ReplaceAll(path, "{{ .hostRoot }}", hostRoot)
if expectedLink.target != "" {
target, err := symlinks.Resolve(path)
require.ErrorIs(t, err, expectedLink.err)
require.Equal(t, expectedLink.target, target)
} else {
_, err := os.Stat(path)
require.ErrorIs(t, err, expectedLink.err)
}
}
})
}
}
func TestCreateLinkRelativePath(t *testing.T) {
tmpDir := t.TempDir()
hostRoot := filepath.Join(tmpDir, "/host-root/")
containerRoot := filepath.Join(tmpDir, "/container-root")
require.NoError(t, makeFs(hostRoot))
require.NoError(t, makeFs(containerRoot, dirOrLink{path: "/lib/"}))
// nvidia-cdi-hook create-symlinks --link libfoo.so.1::/lib/libfoo.so
err := getTestCommand().createLink(containerRoot, "libfoo.so.1", "/lib/libfoo.so")
require.NoError(t, err)
target, err := symlinks.Resolve(filepath.Join(containerRoot, "/lib/libfoo.so"))
require.NoError(t, err)
require.Equal(t, "libfoo.so.1", target)
}
func TestCreateLinkAbsolutePath(t *testing.T) {
tmpDir := t.TempDir()
hostRoot := filepath.Join(tmpDir, "/host-root/")
containerRoot := filepath.Join(tmpDir, "/container-root")
require.NoError(t, makeFs(hostRoot))
require.NoError(t, makeFs(containerRoot, dirOrLink{path: "/lib/"}))
// nvidia-cdi-hook create-symlinks --link /lib/libfoo.so.1::/lib/libfoo.so
err := getTestCommand().createLink(containerRoot, "/lib/libfoo.so.1", "/lib/libfoo.so")
require.NoError(t, err)
target, err := symlinks.Resolve(filepath.Join(containerRoot, "/lib/libfoo.so"))
require.NoError(t, err)
require.Equal(t, "/lib/libfoo.so.1", target)
}
func TestCreateLinkAlreadyExists(t *testing.T) {
testCases := []struct {
description string
containerContents []dirOrLink
shouldExist []string
}{
{
description: "link already exists with correct target",
containerContents: []dirOrLink{{path: "/lib/libfoo.so", target: "libfoo.so.1"}},
shouldExist: []string{},
},
{
description: "link already exists with different target",
containerContents: []dirOrLink{{path: "/lib/libfoo.so", target: "different-target"}, {path: "different-target"}},
shouldExist: []string{"{{ .containerRoot }}/different-target"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
tmpDir := t.TempDir()
hostRoot := filepath.Join(tmpDir, "/host-root/")
containerRoot := filepath.Join(tmpDir, "/container-root")
require.NoError(t, makeFs(hostRoot))
require.NoError(t, makeFs(containerRoot, tc.containerContents...))
// nvidia-cdi-hook create-symlinks --link libfoo.so.1::/lib/libfoo.so
err := getTestCommand().createLink(containerRoot, "libfoo.so.1", "/lib/libfoo.so")
require.NoError(t, err)
target, err := symlinks.Resolve(filepath.Join(containerRoot, "lib/libfoo.so"))
require.NoError(t, err)
require.Equal(t, "libfoo.so.1", target)
for _, p := range tc.shouldExist {
require.DirExists(t, strings.ReplaceAll(p, "{{ .containerRoot }}", containerRoot))
}
})
}
}
func TestCreateLinkOutOfBounds(t *testing.T) {
tmpDir := t.TempDir()
hostRoot := filepath.Join(tmpDir, "/host-root")
containerRoot := filepath.Join(tmpDir, "/container-root")
require.NoError(t,
makeFs(hostRoot,
dirOrLink{path: "libfoo.so"},
),
)
require.NoError(t,
makeFs(containerRoot,
dirOrLink{path: "/lib"},
dirOrLink{path: "/lib/foo", target: hostRoot},
),
)
path, err := symlinks.Resolve(filepath.Join(containerRoot, "/lib/foo"))
require.NoError(t, err)
require.Equal(t, hostRoot, path)
// nvidia-cdi-hook create-symlinks --link ../libfoo.so.1::/lib/foo/libfoo.so
_ = getTestCommand().createLink(containerRoot, "../libfoo.so.1", "/lib/foo/libfoo.so")
require.NoError(t, err)
target, err := symlinks.Resolve(filepath.Join(containerRoot, hostRoot, "libfoo.so"))
require.NoError(t, err)
require.Equal(t, "../libfoo.so.1", target)
require.DirExists(t, filepath.Join(hostRoot, "libfoo.so"))
}
type dirOrLink struct {
path string
target string
}
func makeFs(tmpdir string, fs ...dirOrLink) error {
if err := os.MkdirAll(tmpdir, 0o755); err != nil {
return err
}
for _, s := range fs {
s.path = filepath.Join(tmpdir, s.path)
if s.target == "" {
_ = os.MkdirAll(s.path, 0o755)
continue
}
if err := os.MkdirAll(filepath.Dir(s.path), 0o755); err != nil {
return err
}
if err := os.Symlink(s.target, s.path); err != nil && !os.IsExist(err) {
return err
}
}
return nil
}
// getTestCommand creates a command for running tests against.
func getTestCommand() *command {
logger, _ := testlog.NewNullLogger()
return &command{
logger: logger,
}
}

View File

@@ -1,76 +0,0 @@
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package cudacompat
import (
"os"
"path/filepath"
"github.com/moby/sys/symlink"
)
// A containerRoot represents the root filesystem of a container.
type containerRoot string
// hasPath checks whether the specified path exists in the root.
func (r containerRoot) hasPath(path string) bool {
resolved, err := r.resolve(path)
if err != nil {
return false
}
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
return false
}
return true
}
// globFiles matches the specified pattern in the root.
// The files that match must be regular files.
func (r containerRoot) globFiles(pattern string) ([]string, error) {
patternPath, err := r.resolve(pattern)
if err != nil {
return nil, err
}
matches, err := filepath.Glob(patternPath)
if err != nil {
return nil, err
}
var files []string
for _, match := range matches {
info, err := os.Lstat(match)
if err != nil {
return nil, err
}
// Ignore symlinks.
if info.Mode()&os.ModeSymlink != 0 {
continue
}
// Ignore directories.
if info.IsDir() {
continue
}
files = append(files, match)
}
return files, nil
}
// resolve returns the absolute path including root path.
// Symlinks are resolved, but are guaranteed to resolve in the root.
func (r containerRoot) resolve(path string) (string, error) {
absolute := filepath.Clean(filepath.Join(string(r), path))
return symlink.FollowSymlinkInScope(absolute, string(r))
}

View File

@@ -1,221 +0,0 @@
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package cudacompat
import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
const (
cudaCompatPath = "/usr/local/cuda/compat"
// cudaCompatLdsoconfdFilenamePattern specifies the pattern for the filename
// in ld.so.conf.d that includes a reference to the CUDA compat path.
// The 00-compat prefix is chosen to ensure that these libraries have a
// higher precedence than other libraries on the system.
cudaCompatLdsoconfdFilenamePattern = "00-compat-*.conf"
)
type command struct {
logger logger.Interface
}
type options struct {
hostDriverVersion string
containerSpec string
}
// NewCommand constructs a cuda-compat command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build the enable-cuda-compat command
func (m command) build() *cli.Command {
cfg := options{}
// Create the 'enable-cuda-compat' command
c := cli.Command{
Name: "enable-cuda-compat",
Usage: "This hook ensures that the folder containing the CUDA compat libraries is added to the ldconfig search path if required.",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "host-driver-version",
Usage: "Specify the host driver version. If the CUDA compat libraries detected in the container do not have a higher MAJOR version, the hook is a no-op.",
Destination: &cfg.hostDriverVersion,
},
&cli.StringFlag{
Name: "container-spec",
Hidden: true,
Category: "testing-only",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) validateFlags(_ *cli.Context, cfg *options) error {
return nil
}
func (m command) run(_ *cli.Context, cfg *options) error {
if cfg.hostDriverVersion == "" {
return nil
}
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %w", err)
}
containerRootDir, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %w", err)
}
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.hostDriverVersion)
if err != nil {
return fmt.Errorf("failed to get container forward compat directory: %w", err)
}
if containerForwardCompatDir == "" {
return nil
}
return m.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir)
}
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) {
if hostDriverVersion == "" {
m.logger.Debugf("Host driver version not specified")
return "", nil
}
if !containerRoot.hasPath(cudaCompatPath) {
m.logger.Debugf("No CUDA forward compatibility libraries directory in container")
return "", nil
}
if !containerRoot.hasPath("/etc/ld.so.cache") {
m.logger.Debugf("The container does not have an LDCache")
return "", nil
}
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*"))
if err != nil {
m.logger.Warningf("Failed to find CUDA compat library: %w", err)
return "", nil
}
if len(libs) == 0 {
m.logger.Debugf("No CUDA forward compatibility libraries container")
return "", nil
}
if len(libs) != 1 {
m.logger.Warningf("Unexpected number of CUDA compat libraries in container: %v", libs)
return "", nil
}
compatDriverVersion := strings.TrimPrefix(filepath.Base(libs[0]), "libcuda.so.")
compatMajor, err := extractMajorVersion(compatDriverVersion)
if err != nil {
return "", fmt.Errorf("failed to extract major version from %q: %v", compatDriverVersion, err)
}
driverMajor, err := extractMajorVersion(hostDriverVersion)
if err != nil {
return "", fmt.Errorf("failed to extract major version from %q: %v", hostDriverVersion, err)
}
if driverMajor >= compatMajor {
m.logger.Debugf("Compat major version is not greater than the host driver major version (%v >= %v)", hostDriverVersion, compatDriverVersion)
return "", nil
}
resolvedCompatDir := strings.TrimPrefix(filepath.Dir(libs[0]), string(containerRoot))
return resolvedCompatDir, nil
}
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root.
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
// contains the specified directories on each line.
func (m command) createLdsoconfdFile(in containerRoot, pattern string, dirs ...string) error {
if len(dirs) == 0 {
m.logger.Debugf("No directories to add to /etc/ld.so.conf")
return nil
}
ldsoconfdDir, err := in.resolve("/etc/ld.so.conf.d")
if err != nil {
return err
}
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
}
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
if err != nil {
return fmt.Errorf("failed to create config file: %w", err)
}
defer configFile.Close()
m.logger.Debugf("Adding directories %v to %v", dirs, configFile.Name())
added := make(map[string]bool)
for _, dir := range dirs {
if added[dir] {
continue
}
_, err = fmt.Fprintf(configFile, "%s\n", dir)
if err != nil {
return fmt.Errorf("failed to update config file: %w", err)
}
added[dir] = true
}
// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := configFile.Chmod(0644); err != nil {
return fmt.Errorf("failed to chmod config file: %w", err)
}
return nil
}
// extractMajorVersion parses a version string and returns the major version as an int.
func extractMajorVersion(version string) (int, error) {
majorString := strings.SplitN(version, ".", 2)[0]
return strconv.Atoi(majorString)
}

View File

@@ -1,182 +0,0 @@
/*
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package cudacompat
import (
"os"
"path/filepath"
"strings"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestCompatLibs(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
contents map[string]string
hostDriverVersion string
expectedContainerForwardCompatDir string
}{
{
description: "empty root",
hostDriverVersion: "222.55.66",
},
{
description: "compat lib is newer; no ldcache",
contents: map[string]string{
"/usr/local/cuda/compat/libcuda.so.333.88.99": "",
},
hostDriverVersion: "222.55.66",
},
{
description: "compat lib is newer; ldcache",
contents: map[string]string{
"/etc/ld.so.cache": "",
"/usr/local/cuda/compat/libcuda.so.333.88.99": "",
},
hostDriverVersion: "222.55.66",
expectedContainerForwardCompatDir: "/usr/local/cuda/compat",
},
{
description: "compat lib is older; ldcache",
contents: map[string]string{
"/etc/ld.so.cache": "",
"/usr/local/cuda/compat/libcuda.so.111.88.99": "",
},
hostDriverVersion: "222.55.66",
expectedContainerForwardCompatDir: "",
},
{
description: "compat lib has same major version; ldcache",
contents: map[string]string{
"/etc/ld.so.cache": "",
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
},
hostDriverVersion: "222.55.66",
expectedContainerForwardCompatDir: "",
},
{
description: "numeric comparison is used; ldcache",
contents: map[string]string{
"/etc/ld.so.cache": "",
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
},
hostDriverVersion: "99.55.66",
expectedContainerForwardCompatDir: "/usr/local/cuda/compat",
},
{
description: "driver version empty; ldcache",
contents: map[string]string{
"/etc/ld.so.cache": "",
"/usr/local/cuda/compat/libcuda.so.222.88.99": "",
},
hostDriverVersion: "",
},
{
description: "symlinks are followed",
contents: map[string]string{
"/etc/ld.so.cache": "",
"/etc/alternatives/cuda/compat/libcuda.so.333.88.99": "",
"/usr/local/cuda": "symlink=/etc/alternatives/cuda",
},
hostDriverVersion: "222.55.66",
expectedContainerForwardCompatDir: "/etc/alternatives/cuda/compat",
},
{
description: "symlinks stay in container",
contents: map[string]string{
"/etc/ld.so.cache": "",
"/compat/libcuda.so.333.88.99": "",
"/usr/local/cuda": "symlink=../../../../../../",
},
hostDriverVersion: "222.55.66",
expectedContainerForwardCompatDir: "/compat",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
containerRootDir := t.TempDir()
for name, contents := range tc.contents {
target := filepath.Join(containerRootDir, name)
require.NoError(t, os.MkdirAll(filepath.Dir(target), 0755))
if strings.HasPrefix(contents, "symlink=") {
require.NoError(t, os.Symlink(strings.TrimPrefix(contents, "symlink="), target))
continue
}
require.NoError(t, os.WriteFile(target, []byte(contents), 0600))
}
c := command{
logger: logger,
}
containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), tc.hostDriverVersion)
require.NoError(t, err)
require.EqualValues(t, tc.expectedContainerForwardCompatDir, containerForwardCompatDir)
})
}
}
func TestUpdateLdconfig(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
folders []string
expectedContents string
}{
{
description: "no folders; have no contents",
},
{
description: "single folder is added",
folders: []string{"/usr/local/cuda/compat"},
expectedContents: "/usr/local/cuda/compat\n",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
containerRootDir := t.TempDir()
c := command{
logger: logger,
}
err := c.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, tc.folders...)
require.NoError(t, err)
matches, err := filepath.Glob(filepath.Join(containerRootDir, "/etc/ld.so.conf.d/00-compat-*.conf"))
require.NoError(t, err)
if tc.expectedContents == "" {
require.Empty(t, matches)
return
}
require.Len(t, matches, 1)
contents, err := os.ReadFile(matches[0])
require.NoError(t, err)
require.EqualValues(t, tc.expectedContents, string(contents))
})
}
}

View File

@@ -1,144 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package disabledevicenodemodification
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"os"
"strings"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
const (
nvidiaDriverParamsPath = "/proc/driver/nvidia/params"
)
type options struct {
containerSpec string
}
// NewCommand constructs an disable-device-node-modification subcommand with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
cfg := options{}
c := cli.Command{
Name: "disable-device-node-modification",
Usage: "Ensure that the /proc/driver/nvidia/params file present in the container does not allow device node modifications.",
Before: func(c *cli.Context) error {
return validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "container-spec",
Hidden: true,
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func validateFlags(c *cli.Context, cfg *options) error {
return nil
}
func run(_ *cli.Context, cfg *options) error {
modifiedParamsFileContents, err := getModifiedNVIDIAParamsContents()
if err != nil {
return fmt.Errorf("failed to get modified params file contents: %w", err)
}
if len(modifiedParamsFileContents) == 0 {
return nil
}
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %w", err)
}
containerRootDirPath, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %w", err)
}
return createParamsFileInContainer(containerRootDirPath, modifiedParamsFileContents)
}
func getModifiedNVIDIAParamsContents() ([]byte, error) {
hostNvidiaParamsFile, err := os.Open(nvidiaDriverParamsPath)
if errors.Is(err, os.ErrNotExist) {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("failed to load params file: %w", err)
}
defer hostNvidiaParamsFile.Close()
modifiedContents, err := getModifiedParamsFileContentsFromReader(hostNvidiaParamsFile)
if err != nil {
return nil, fmt.Errorf("failed to get modfied params file contents: %w", err)
}
return modifiedContents, nil
}
// getModifiedParamsFileContentsFromReader returns the contents of a modified params file from the specified reader.
func getModifiedParamsFileContentsFromReader(r io.Reader) ([]byte, error) {
var modified bytes.Buffer
scanner := bufio.NewScanner(r)
var requiresModification bool
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "ModifyDeviceFiles: ") {
if line == "ModifyDeviceFiles: 0" {
return nil, nil
}
if line == "ModifyDeviceFiles: 1" {
line = "ModifyDeviceFiles: 0"
requiresModification = true
}
}
if _, err := modified.WriteString(line + "\n"); err != nil {
return nil, fmt.Errorf("failed to create output buffer: %w", err)
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to read params file: %w", err)
}
if !requiresModification {
return nil, nil
}
return modified.Bytes(), nil
}

View File

@@ -1,91 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package disabledevicenodemodification
import (
"bytes"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetModifiedParamsFileContentsFromReader(t *testing.T) {
testCases := map[string]struct {
contents []byte
expectedError error
expectedContents []byte
}{
"no contents": {
contents: nil,
expectedError: nil,
expectedContents: nil,
},
"other contents are ignored": {
contents: []byte(`# Some other content
that we don't care about
`),
expectedError: nil,
expectedContents: nil,
},
"already zero requires no modification": {
contents: []byte("ModifyDeviceFiles: 0"),
expectedError: nil,
expectedContents: nil,
},
"leading spaces require no modification": {
contents: []byte(" ModifyDeviceFiles: 1"),
},
"Trailing spaces require no modification": {
contents: []byte("ModifyDeviceFiles: 1 "),
},
"Not 1 require no modification": {
contents: []byte("ModifyDeviceFiles: 11"),
},
"single line requires modification": {
contents: []byte("ModifyDeviceFiles: 1"),
expectedError: nil,
expectedContents: []byte("ModifyDeviceFiles: 0\n"),
},
"single line with trailing newline requires modification": {
contents: []byte("ModifyDeviceFiles: 1\n"),
expectedError: nil,
expectedContents: []byte("ModifyDeviceFiles: 0\n"),
},
"other content is maintained": {
contents: []byte(`ModifyDeviceFiles: 1
other content
that
is maintained`),
expectedError: nil,
expectedContents: []byte(`ModifyDeviceFiles: 0
other content
that
is maintained
`),
},
}
for description, tc := range testCases {
t.Run(description, func(t *testing.T) {
contents, err := getModifiedParamsFileContentsFromReader(bytes.NewReader(tc.contents))
require.EqualValues(t, tc.expectedError, err)
require.EqualValues(t, string(tc.expectedContents), string(contents))
})
}
}

View File

@@ -1,63 +0,0 @@
//go:build linux
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package disabledevicenodemodification
import (
"fmt"
"os"
"path/filepath"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
tmpRoot, err := os.MkdirTemp("", "nvct-empty-dir*")
if err != nil {
return fmt.Errorf("failed to create temp root: %w", err)
}
if err := createTmpFs(tmpRoot, len(contents)); err != nil {
return fmt.Errorf("failed to create tmpfs mount for params file: %w", err)
}
modifiedParamsFile, err := os.OpenFile(filepath.Join(tmpRoot, "nvct-params"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0444)
if err != nil {
return fmt.Errorf("failed to open modified params file: %w", err)
}
defer modifiedParamsFile.Close()
if _, err := modifiedParamsFile.Write(contents); err != nil {
return fmt.Errorf("failed to write temporary params file: %w", err)
}
err = utils.WithProcfd(containerRootDirPath, nvidiaDriverParamsPath, func(nvidiaDriverParamsFdPath string) error {
return unix.Mount(modifiedParamsFile.Name(), nvidiaDriverParamsFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
})
if err != nil {
return fmt.Errorf("failed to mount modified params file: %w", err)
}
return nil
}
func createTmpFs(target string, size int) error {
return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size))
}

View File

@@ -1,27 +0,0 @@
//go:build !linux
// +build !linux
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package disabledevicenodemodification
import "fmt"
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
return fmt.Errorf("not supported")
}

View File

@@ -1,107 +0,0 @@
/**
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands"
)
// options defines the options that can be set for the CLI through config files,
// environment variables, or command line flags
type options struct {
// Debug indicates whether the CLI is started in "debug" mode
Debug bool
// Quiet indicates whether the CLI is started in "quiet" mode
Quiet bool
}
func main() {
logger := logrus.New()
// Create a options struct to hold the parsed environment variables or command line flags
opts := options{}
// Create the top-level CLI
c := cli.NewApp()
c.Name = "NVIDIA CDI Hook"
c.UseShortOptionHandling = true
c.EnableBashCompletion = true
c.Usage = "Command to structure files for usage inside a container, called as hooks from a container runtime, defined in a CDI yaml file"
c.Version = info.GetVersionString()
// We set the default action for the `nvidia-cdi-hook` command to issue a
// warning and exit with no error.
// This means that if an unsupported hook is run, a container will not fail
// to launch. An unsupported hook could be the result of a CDI specification
// referring to a new hook that is not yet supported by an older NVIDIA
// Container Toolkit version or a hook that has been removed in newer
// version.
c.Action = func(ctx *cli.Context) error {
commands.IssueUnsupportedHookWarning(logger, ctx)
return nil
}
// Setup the flags for this command
c.Flags = []cli.Flag{
&cli.BoolFlag{
Name: "debug",
Aliases: []string{"d"},
Usage: "Enable debug-level logging",
Destination: &opts.Debug,
// TODO: Support for NVIDIA_CDI_DEBUG is deprecated and NVIDIA_CTK_DEBUG should be used instead.
EnvVars: []string{"NVIDIA_CTK_DEBUG", "NVIDIA_CDI_DEBUG"},
},
&cli.BoolFlag{
Name: "quiet",
Usage: "Suppress all output except for errors; overrides --debug",
Destination: &opts.Quiet,
// TODO: Support for NVIDIA_CDI_QUIET is deprecated and NVIDIA_CTK_QUIET should be used instead.
EnvVars: []string{"NVDIA_CTK_QUIET", "NVIDIA_CDI_QUIET"},
},
}
// Set log-level for all subcommands
c.Before = func(c *cli.Context) error {
logLevel := logrus.InfoLevel
if opts.Debug {
logLevel = logrus.DebugLevel
}
if opts.Quiet {
logLevel = logrus.ErrorLevel
}
logger.SetLevel(logLevel)
return nil
}
// Define the subcommands
c.Commands = commands.New(logger)
// Run the CLI
err := c.Run(os.Args)
if err != nil {
logger.Errorf("%v", err)
os.Exit(1)
}
}

View File

@@ -1,46 +0,0 @@
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"os"
"path/filepath"
"github.com/moby/sys/symlink"
)
// A containerRoot represents the root filesystem of a container.
type containerRoot string
// hasPath checks whether the specified path exists in the root.
func (r containerRoot) hasPath(path string) bool {
resolved, err := r.resolve(path)
if err != nil {
return false
}
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
return false
}
return true
}
// resolve returns the absolute path including root path.
// Symlinks are resolved, but are guaranteed to resolve in the root.
func (r containerRoot) resolve(path string) (string, error) {
absolute := filepath.Clean(filepath.Join(string(r), path))
return symlink.FollowSymlinkInScope(absolute, string(r))
}

View File

@@ -1,200 +0,0 @@
//go:build linux
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/moby/sys/reexec"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
// pivotRoot will call pivot_root such that rootfs becomes the new root
// filesystem, and everything else is cleaned up.
// This is adapted from the implementation here:
//
// https://github.com/opencontainers/runc/blob/e89a29929c775025419ab0d218a43588b4c12b9a/libcontainer/rootfs_linux.go#L1056-L1113
//
// With the `mount` and `unmount` calls changed to direct unix.Mount and unix.Unmount calls.
func pivotRoot(rootfs string) error {
// While the documentation may claim otherwise, pivot_root(".", ".") is
// actually valid. What this results in is / being the new root but
// /proc/self/cwd being the old root. Since we can play around with the cwd
// with pivot_root this allows us to pivot without creating directories in
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return &os.PathError{Op: "open", Path: "/", Err: err}
}
defer unix.Close(oldroot) //nolint: errcheck
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return &os.PathError{Op: "open", Path: rootfs, Err: err}
}
defer unix.Close(newroot) //nolint: errcheck
// Change to the new root so that the pivot_root actually acts on it.
if err := unix.Fchdir(newroot); err != nil {
return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err}
}
if err := unix.PivotRoot(".", "."); err != nil {
return &os.PathError{Op: "pivot_root", Path: ".", Err: err}
}
// Currently our "." is oldroot (according to the current kernel code).
// However, purely for safety, we will fchdir(oldroot) since there isn't
// really any guarantee from the kernel what /proc/self/cwd will be after a
// pivot_root(2).
if err := unix.Fchdir(oldroot); err != nil {
return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err}
}
// Make oldroot rslave to make sure our unmounts don't propagate to the
// host (and thus bork the machine). We don't use rprivate because this is
// known to cause issues due to races where we still have a reference to a
// mount while a process in the host namespace are trying to operate on
// something they think has no mounts (devicemapper in particular).
if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
return err
}
// Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
return err
}
// Switch back to our shiny new root.
if err := unix.Chdir("/"); err != nil {
return &os.PathError{Op: "chdir", Path: "/", Err: err}
}
return nil
}
// mountLdConfig mounts the host ldconfig to the mount namespace of the hook.
// We use WithProcfd to perform the mount operations to ensure that the changes
// are persisted across the pivot root.
func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) {
hostLdconfigInfo, err := os.Stat(hostLdconfigPath)
if err != nil {
return "", fmt.Errorf("error reading host ldconfig: %w", err)
}
hookScratchDirPath := "/var/run/nvidia-ctk-hook"
ldconfigPath := filepath.Join(hookScratchDirPath, "ldconfig")
if err := utils.MkdirAllInRoot(containerRootDirPath, hookScratchDirPath, 0755); err != nil {
return "", fmt.Errorf("error creating hook scratch folder: %w", err)
}
err = utils.WithProcfd(containerRootDirPath, hookScratchDirPath, func(hookScratchDirFdPath string) error {
return createTmpFs(hookScratchDirFdPath, int(hostLdconfigInfo.Size()))
})
if err != nil {
return "", fmt.Errorf("error creating tmpfs: %w", err)
}
if _, err := createFileInRoot(containerRootDirPath, ldconfigPath, hostLdconfigInfo.Mode()); err != nil {
return "", fmt.Errorf("error creating ldconfig: %w", err)
}
err = utils.WithProcfd(containerRootDirPath, ldconfigPath, func(ldconfigFdPath string) error {
return unix.Mount(hostLdconfigPath, ldconfigFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
})
if err != nil {
return "", fmt.Errorf("error bind mounting host ldconfig: %w", err)
}
return ldconfigPath, nil
}
func createFileInRoot(containerRootDirPath string, destinationPath string, mode os.FileMode) (string, error) {
dest, err := securejoin.SecureJoin(containerRootDirPath, destinationPath)
if err != nil {
return "", err
}
// Make the parent directory.
destDir, destBase := filepath.Split(dest)
destDirFd, err := utils.MkdirAllInRootOpen(containerRootDirPath, destDir, 0755)
if err != nil {
return "", fmt.Errorf("error creating parent dir: %w", err)
}
defer destDirFd.Close()
// Make the target file. We want to avoid opening any file that is
// already there because it could be a "bad" file like an invalid
// device or hung tty that might cause a DoS, so we use mknodat.
// destBase does not contain any "/" components, and mknodat does
// not follow trailing symlinks, so we can safely just call mknodat
// here.
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|uint32(mode), 0); err != nil {
// If we get EEXIST, there was already an inode there and
// we can consider that a success.
if !errors.Is(err, unix.EEXIST) {
return "", fmt.Errorf("error creating empty file: %w", err)
}
}
return dest, nil
}
// mountProc mounts a clean proc filesystem in the new root.
func mountProc(newroot string) error {
target := filepath.Join(newroot, "/proc")
if err := os.MkdirAll(target, 0755); err != nil {
return fmt.Errorf("error creating directory: %w", err)
}
return unix.Mount("proc", target, "proc", 0, "")
}
// createTmpFs creates a tmpfs at the specified location with the specified size.
func createTmpFs(target string, size int) error {
return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size))
}
// createReexecCommand creates a command that can be used to trigger the reexec
// initializer.
// On linux this command runs in new namespaces.
func createReexecCommand(args []string) *exec.Cmd {
cmd := reexec.Command(args...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWNS |
syscall.CLONE_NEWUTS |
syscall.CLONE_NEWIPC |
syscall.CLONE_NEWPID |
syscall.CLONE_NEWNET,
}
return cmd
}

View File

@@ -1,51 +0,0 @@
//go:build !linux
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"fmt"
"os"
"os/exec"
"github.com/moby/sys/reexec"
)
func pivotRoot(newroot string) error {
return fmt.Errorf("not supported")
}
func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) {
return "", fmt.Errorf("not supported")
}
func mountProc(newroot string) error {
return fmt.Errorf("not supported")
}
// createReexecCommand creates a command that can be used ot trigger the reexec
// initializer.
func createReexecCommand(args []string) *exec.Cmd {
cmd := reexec.Command(args...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd
}

View File

@@ -1,58 +0,0 @@
//go:build linux
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"fmt"
"os"
"strconv"
"syscall"
"github.com/opencontainers/runc/libcontainer/exeseal"
)
// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it.
func SafeExec(path string, args []string, envv []string) error {
safeExe, err := cloneBinary(path)
if err != nil {
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(path, args, envv)
}
defer safeExe.Close()
exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(exePath, args, envv)
}
func cloneBinary(path string) (*os.File, error) {
exe, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("opening current binary: %w", err)
}
defer exe.Close()
stat, err := exe.Stat()
if err != nil {
return nil, fmt.Errorf("checking %v size: %w", path, err)
}
size := stat.Size()
return exeseal.CloneBinary(exe, size, path, os.TempDir())
}

View File

@@ -1,28 +0,0 @@
//go:build !linux
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import "syscall"
// SafeExec is not implemented on non-linux systems and forwards directly to the
// Exec syscall.
func SafeExec(path string, args []string, envv []string) error {
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(path, args, envv)
}

View File

@@ -1,255 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"errors"
"fmt"
"log"
"os"
"strings"
"github.com/moby/sys/reexec"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
const (
// ldsoconfdFilenamePattern specifies the pattern for the filename
// in ld.so.conf.d that includes references to the specified directories.
// The 00-nvcr prefix is chosen to ensure that these libraries have a
// higher precedence than other libraries on the system, but lower than
// the 00-cuda-compat that is included in some containers.
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
reexecUpdateLdCacheCommandName = "reexec-update-ldcache"
)
type command struct {
logger logger.Interface
}
type options struct {
folders cli.StringSlice
ldconfigPath string
containerSpec string
}
func init() {
reexec.Register(reexecUpdateLdCacheCommandName, updateLdCacheHandler)
if reexec.Init() {
os.Exit(0)
}
}
// NewCommand constructs an update-ldcache command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build the update-ldcache command
func (m command) build() *cli.Command {
cfg := options{}
// Create the 'update-ldcache' command
c := cli.Command{
Name: "update-ldcache",
Usage: "Update ldcache in a container by running ldconfig",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "folder",
Usage: "Specify a folder to add to /etc/ld.so.conf before updating the ld cache",
Destination: &cfg.folders,
},
&cli.StringFlag{
Name: "ldconfig-path",
Usage: "Specify the path to the ldconfig program",
Destination: &cfg.ldconfigPath,
Value: "/sbin/ldconfig",
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *options) error {
if cfg.ldconfigPath == "" {
return errors.New("ldconfig-path must be specified")
}
return nil
}
func (m command) run(c *cli.Context, cfg *options) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRootDir, err := s.GetContainerRoot()
if err != nil || containerRootDir == "" || containerRootDir == "/" {
return fmt.Errorf("failed to determined container root: %v", err)
}
args := []string{
reexecUpdateLdCacheCommandName,
strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"),
containerRootDir,
}
args = append(args, cfg.folders.Value()...)
cmd := createReexecCommand(args)
return cmd.Run()
}
// updateLdCacheHandler wraps updateLdCache with error handling.
func updateLdCacheHandler() {
if err := updateLdCache(os.Args); err != nil {
log.Printf("Error updating ldcache: %v", err)
os.Exit(1)
}
}
// updateLdCache is invoked from a reexec'd handler and provides namespace
// isolation for the operations performed by this hook.
// At the point where this is invoked, we are in a new mount namespace that is
// cloned from the parent.
//
// args[0] is the reexec initializer function name
// args[1] is the path of the ldconfig binary on the host
// args[2] is the container root directory
// The remaining args are folders that need to be added to the ldcache.
func updateLdCache(args []string) error {
if len(args) < 3 {
return fmt.Errorf("incorrect arguments: %v", args)
}
hostLdconfigPath := args[1]
containerRootDirPath := args[2]
// To prevent leaking the parent proc filesystem, we create a new proc mount
// in the container root.
if err := mountProc(containerRootDirPath); err != nil {
return fmt.Errorf("error mounting /proc: %w", err)
}
// We mount the host ldconfig before we pivot root since host paths are not
// visible after the pivot root operation.
ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath)
if err != nil {
return fmt.Errorf("error mounting host ldconfig: %w", err)
}
// We pivot to the container root for the new process, this further limits
// access to the host.
if err := pivotRoot(containerRootDirPath); err != nil {
return fmt.Errorf("error running pivot_root: %w", err)
}
return runLdconfig(ldconfigPath, args[3:]...)
}
// runLdconfig runs the ldconfig binary and ensures that the specified directories
// are processed for the ldcache.
func runLdconfig(ldconfigPath string, directories ...string) error {
args := []string{
"ldconfig",
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
// be configured to use a different config file by default.
// Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is
// in the container.
"-f", "/etc/ld.so.conf",
}
containerRoot := containerRoot("/")
if containerRoot.hasPath("/etc/ld.so.cache") {
args = append(args, "-C", "/etc/ld.so.cache")
} else {
args = append(args, "-N")
}
if containerRoot.hasPath("/etc/ld.so.conf.d") {
err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...)
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %w", err)
}
} else {
args = append(args, directories...)
}
return SafeExec(ldconfigPath, args, nil)
}
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/.
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
// contains the specified directories on each line.
func createLdsoconfdFile(pattern string, dirs ...string) error {
if len(dirs) == 0 {
return nil
}
ldsoconfdDir := "/etc/ld.so.conf.d"
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
}
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
if err != nil {
return fmt.Errorf("failed to create config file: %w", err)
}
defer func() {
_ = configFile.Close()
}()
added := make(map[string]bool)
for _, dir := range dirs {
if added[dir] {
continue
}
_, err = fmt.Fprintf(configFile, "%s\n", dir)
if err != nil {
return fmt.Errorf("failed to update config file: %w", err)
}
added[dir] = true
}
// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := configFile.Chmod(0644); err != nil {
return fmt.Errorf("failed to chmod config file: %w", err)
}
return nil
}

View File

@@ -6,6 +6,8 @@ import (
"log"
"os"
"path"
"path/filepath"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
@@ -13,11 +15,31 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
)
const (
envCUDAVersion = "CUDA_VERSION"
envNVRequirePrefix = "NVIDIA_REQUIRE_"
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)
const (
capSysAdmin = "CAP_SYS_ADMIN"
)
const (
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
)
type nvidiaConfig struct {
Devices []string
Devices string
MigConfigDevices string
MigMonitorDevices string
ImexChannels []string
ImexChannels string
DriverCapabilities string
// Requirements defines the requirements DSL for the container to run.
// This is empty if no specific requirements are needed, or if requirements are
@@ -55,14 +77,23 @@ type LinuxCapabilities struct {
Ambient []string `json:"ambient,omitempty" platform:"linux"`
}
// Mount from OCI runtime spec
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
type Mount struct {
Destination string `json:"destination"`
Type string `json:"type,omitempty" platform:"linux,solaris"`
Source string `json:"source,omitempty"`
Options []string `json:"options,omitempty"`
}
// Spec from OCI runtime spec
// We use pointers to structs, similarly to the latest version of runtime-spec:
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
type Spec struct {
Version *string `json:"ociVersion"`
Process *Process `json:"process,omitempty"`
Root *Root `json:"root,omitempty"`
Mounts []specs.Mount `json:"mounts,omitempty"`
Version *string `json:"ociVersion"`
Process *Process `json:"process,omitempty"`
Root *Root `json:"root,omitempty"`
Mounts []Mount `json:"mounts,omitempty"`
}
// HookState holds state information about the hook
@@ -99,9 +130,9 @@ func loadSpec(path string) (spec *Spec) {
return
}
func (s *Spec) GetCapabilities() []string {
if s == nil || s.Process == nil || s.Process.Capabilities == nil {
return nil
func isPrivileged(s *Spec) bool {
if s.Process.Capabilities == nil {
return false
}
var caps []string
@@ -114,72 +145,150 @@ func (s *Spec) GetCapabilities() []string {
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
return caps
for _, c := range caps {
if c == capSysAdmin {
return true
}
}
return false
}
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
capabilities := specs.LinuxCapabilities{}
err := json.Unmarshal(*s.Process.Capabilities, &capabilities)
process := specs.Process{
Env: s.Process.Env,
}
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
return image.OCISpecCapabilities(capabilities).GetCapabilities()
}
func isPrivileged(s *Spec) bool {
return image.IsPrivileged(s)
}
func getMigConfigDevices(i image.CUDA) *string {
return getMigDevices(i, image.EnvVarNvidiaMigConfigDevices)
}
func getMigMonitorDevices(i image.CUDA) *string {
return getMigDevices(i, image.EnvVarNvidiaMigMonitorDevices)
}
func getMigDevices(image image.CUDA, envvar string) *string {
if !image.HasEnvvar(envvar) {
return nil
}
devices := image.Getenv(envvar)
return &devices
}
func (hookConfig *hookConfig) getImexChannels(image image.CUDA, privileged bool) []string {
if hookConfig.Features.IgnoreImexChannelRequests.IsEnabled() {
return nil
fullSpec := specs.Spec{
Version: *s.Version,
Process: &process,
}
// If enabled, try and get the device list from volume mounts first
if hookConfig.AcceptDeviceListAsVolumeMounts {
devices := image.ImexChannelsFromMounts()
if len(devices) > 0 {
return devices
return image.IsPrivileged(&fullSpec)
}
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
// We check if the image has at least one of the Swarm resource envvars defined and use this
// if specified.
var hasSwarmEnvvar bool
for _, envvar := range swarmResourceEnvvars {
if _, exists := image[envvar]; exists {
hasSwarmEnvvar = true
break
}
}
devices := image.ImexChannelsFromEnvVar()
var devices []string
if hasSwarmEnvvar {
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
} else {
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
}
if len(devices) == 0 {
return nil
}
devicesString := strings.Join(devices, ",")
return &devicesString
}
func getDevicesFromMounts(mounts []Mount) *string {
var devices []string
for _, m := range mounts {
root := filepath.Clean(deviceListAsVolumeMountsRoot)
source := filepath.Clean(m.Source)
destination := filepath.Clean(m.Destination)
// Only consider mounts who's host volume is /dev/null
if source != "/dev/null" {
continue
}
// Only consider container mount points that begin with 'root'
if len(destination) < len(root) {
continue
}
if destination[:len(root)] != root {
continue
}
// Grab the full path beyond 'root' and add it to the list of devices
device := destination[len(root):]
if len(device) > 0 && device[0] == '/' {
device = device[1:]
}
if len(device) == 0 {
continue
}
devices = append(devices, device)
}
if devices == nil {
return nil
}
ret := strings.Join(devices, ",")
return &ret
}
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *string {
// If enabled, try and get the device list from volume mounts first
if hookConfig.AcceptDeviceListAsVolumeMounts {
devices := getDevicesFromMounts(mounts)
if devices != nil {
return devices
}
}
// Fallback to reading from the environment variable if privileges are correct
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
if devices == nil {
return nil
}
if privileged || hookConfig.AcceptEnvvarUnprivileged {
return devices
}
configName := hookConfig.getConfigOption("AcceptEnvvarUnprivileged")
log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, %v=%v) ", privileged, configName, hookConfig.AcceptEnvvarUnprivileged)
return nil
}
func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
func getMigConfigDevices(env map[string]string) *string {
if devices, ok := env[envNVMigConfigDevices]; ok {
return &devices
}
return nil
}
func getMigMonitorDevices(env map[string]string) *string {
if devices, ok := env[envNVMigMonitorDevices]; ok {
return &devices
}
return nil
}
func getImexChannels(env map[string]string) *string {
if chans, ok := env[envNVImexChannels]; ok {
return &chans
}
return nil
}
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
capsEnv := cudaImage.Getenv(image.EnvVarNvidiaDriverCapabilities)
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
if !capsEnvSpecified && legacyImage {
// Environment variable unset with legacy image: set all capabilities.
@@ -198,12 +307,14 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
return capabilities
}
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
legacyImage := image.IsLegacy()
devices := image.VisibleDevices()
if len(devices) == 0 {
// empty devices means this is not a GPU container.
var devices string
if d := getDevices(hookConfig, image, mounts, privileged); d != nil {
devices = *d
} else {
// 'nil' devices means this is not a GPU container.
return nil
}
@@ -223,7 +334,10 @@ func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool)
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
imexChannels := hookConfig.getImexChannels(image, privileged)
var imexChannels string
if c := getImexChannels(image); c != nil {
imexChannels = *c
}
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
@@ -242,7 +356,7 @@ func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool)
}
}
func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
func getContainerConfig(hook HookConfig) (config containerConfig) {
var h HookState
d := json.NewDecoder(os.Stdin)
if err := d.Decode(&h); err != nil {
@@ -256,25 +370,19 @@ func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
s := loadSpec(path.Join(b, "config.json"))
privileged := isPrivileged(s)
i, err := image.New(
image, err := image.New(
image.WithEnv(s.Process.Env),
image.WithMounts(s.Mounts),
image.WithPrivileged(privileged),
image.WithDisableRequire(hookConfig.DisableRequire),
image.WithAcceptDeviceListAsVolumeMounts(hookConfig.AcceptDeviceListAsVolumeMounts),
image.WithAcceptEnvvarUnprivileged(hookConfig.AcceptEnvvarUnprivileged),
image.WithPreferredVisibleDevicesEnvVars(hookConfig.getSwarmResourceEnvvars()...),
image.WithDisableRequire(hook.DisableRequire),
)
if err != nil {
log.Panicln(err)
}
privileged := isPrivileged(s)
return containerConfig{
Pid: h.Pid,
Rootfs: s.Root.Path,
Image: i,
Nvidia: hookConfig.getNvidiaConfig(i, privileged),
Image: image,
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -17,10 +17,18 @@ const (
driverPath = "/run/nvidia/driver"
)
// hookConfig wraps the toolkit config.
// This allows for functions to be defined on the local type.
type hookConfig struct {
*config.Config
var defaultPaths = [...]string{}
// HookConfig : options for the nvidia-container-runtime-hook.
type HookConfig config.Config
func getDefaultHookConfig() (HookConfig, error) {
defaultCfg, err := config.GetDefault()
if err != nil {
return HookConfig{}, err
}
return *(*HookConfig)(defaultCfg), nil
}
// loadConfig loads the required paths for the hook config.
@@ -50,12 +58,12 @@ func loadConfig() (*config.Config, error) {
return config.GetDefault()
}
func getHookConfig() (*hookConfig, error) {
func getHookConfig() (*HookConfig, error) {
cfg, err := loadConfig()
if err != nil {
return nil, fmt.Errorf("failed to load config: %v", err)
}
config := &hookConfig{cfg}
config := (*HookConfig)(cfg)
allSupportedDriverCapabilities := image.SupportedDriverCapabilities
if config.SupportedDriverCapabilities == "all" {
@@ -73,7 +81,7 @@ func getHookConfig() (*hookConfig, error) {
// getConfigOption returns the toml config option associated with the
// specified struct field.
func (c hookConfig) getConfigOption(fieldName string) string {
func (c HookConfig) getConfigOption(fieldName string) string {
t := reflect.TypeOf(c)
f, ok := t.FieldByName(fieldName)
if !ok {
@@ -87,8 +95,8 @@ func (c hookConfig) getConfigOption(fieldName string) string {
}
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
func (c *hookConfig) getSwarmResourceEnvvars() []string {
if c == nil || c.SwarmResource == "" {
func (c *HookConfig) getSwarmResourceEnvvars() []string {
if c.SwarmResource == "" {
return nil
}
@@ -104,26 +112,3 @@ func (c *hookConfig) getSwarmResourceEnvvars() []string {
return envvars
}
// nvidiaContainerCliCUDACompatModeFlags returns required --cuda-compat-mode
// flag(s) depending on the hook and runtime configurations.
func (c *hookConfig) nvidiaContainerCliCUDACompatModeFlags() []string {
var flag string
switch c.NVIDIAContainerRuntimeConfig.Modes.Legacy.CUDACompatMode {
case config.CUDACompatModeLdconfig:
flag = "--cuda-compat-mode=ldconfig"
case config.CUDACompatModeMount:
flag = "--cuda-compat-mode=mount"
case config.CUDACompatModeDisabled, config.CUDACompatModeHook:
flag = "--cuda-compat-mode=disabled"
default:
if !c.Features.AllowCUDACompatLibsFromContainer.IsEnabled() {
flag = "--cuda-compat-mode=disabled"
}
}
if flag == "" {
return nil
}
return []string{flag}
}

View File

@@ -21,10 +21,8 @@ import (
"os"
"testing"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/stretchr/testify/require"
)
func TestGetHookConfig(t *testing.T) {
@@ -85,15 +83,15 @@ func TestGetHookConfig(t *testing.T) {
configflag = &filename
for _, line := range tc.lines {
_, err := fmt.Fprintf(configFile, "%s\n", line)
_, err := configFile.WriteString(fmt.Sprintf("%s\n", line))
require.NoError(t, err)
}
}
var cfg hookConfig
var config HookConfig
getHookConfig := func() {
c, _ := getHookConfig()
cfg = *c
config = *c
}
if tc.expectedPanic {
@@ -103,7 +101,7 @@ func TestGetHookConfig(t *testing.T) {
getHookConfig()
require.EqualValues(t, tc.expectedDriverCapabilities, cfg.SupportedDriverCapabilities)
require.EqualValues(t, tc.expectedDriverCapabilities, config.SupportedDriverCapabilities)
})
}
}
@@ -145,10 +143,8 @@ func TestGetSwarmResourceEnvvars(t *testing.T) {
for i, tc := range testCases {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
c := &hookConfig{
Config: &config.Config{
SwarmResource: tc.value,
},
c := &HookConfig{
SwarmResource: tc.value,
}
envvars := c.getSwarmResourceEnvvars()

View File

@@ -75,7 +75,7 @@ func doPrestart() {
}
cli := hook.NVIDIAContainerCLIConfig
container := hook.getContainerConfig()
container := getContainerConfig(*hook)
nvidia := container.Nvidia
if nvidia == nil {
// Not a GPU container, nothing to do.
@@ -95,9 +95,6 @@ func doPrestart() {
if cli.LoadKmods {
args = append(args, "--load-kmods")
}
if hook.Features.DisableImexChannelCreation.IsEnabled() {
args = append(args, "--no-create-imex-channels")
}
if cli.NoPivot {
args = append(args, "--no-pivot")
}
@@ -114,16 +111,14 @@ func doPrestart() {
}
args = append(args, "configure")
args = append(args, hook.nvidiaContainerCliCUDACompatModeFlags()...)
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
if cli.Ldconfig != "" {
args = append(args, fmt.Sprintf("--ldconfig=%s", cli.Ldconfig))
}
if cli.NoCgroups {
args = append(args, "--no-cgroups")
}
if devicesString := strings.Join(nvidia.Devices, ","); len(devicesString) > 0 {
args = append(args, fmt.Sprintf("--device=%s", devicesString))
if len(nvidia.Devices) > 0 {
args = append(args, fmt.Sprintf("--device=%s", nvidia.Devices))
}
if len(nvidia.MigConfigDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-config=%s", nvidia.MigConfigDevices))
@@ -131,8 +126,8 @@ func doPrestart() {
if len(nvidia.MigMonitorDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
}
if imexString := strings.Join(nvidia.ImexChannels, ","); len(imexString) > 0 {
args = append(args, fmt.Sprintf("--imex-channel=%s", imexString))
if len(nvidia.ImexChannels) > 0 {
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
}
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
@@ -150,7 +145,6 @@ func doPrestart() {
args = append(args, rootfs)
env := append(os.Environ(), cli.Environment...)
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection?
err = syscall.Exec(args[0], args, env)
log.Panicln("exec failed:", err)
}

View File

@@ -21,8 +21,8 @@ The `runtimes` config option allows for the low-level runtime to be specified. T
The default value for this setting is:
```toml
runtimes = [
"docker-runc",
"runc",
"crun",
]
```
@@ -85,126 +85,3 @@ Alternatively the NVIDIA Container Runtime can be set as the default runtime for
}
}
```
## Environment variables (OCI spec)
Each environment variable maps to an command-line argument for `nvidia-container-cli` from [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
These variables are already set in our [official CUDA images](https://hub.docker.com/r/nvidia/cuda/).
### `NVIDIA_VISIBLE_DEVICES`
This variable controls which GPUs will be made accessible inside the container.
#### Possible values
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
* `all`: all GPUs will be accessible, this is the default value in our container images.
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
**Note**: When running on a MIG capable device, the following values will also be available:
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
```
$ nvidia-smi -L
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
```
### `NVIDIA_MIG_CONFIG_DEVICES`
This variable controls which of the visible GPUs can have their MIG
configuration managed from within the container. This includes enabling and
disabling MIG mode, creating and destroying GPU Instances and Compute
Instances, etc.
#### Possible values
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
MIG configurations managed.
**Note**:
* This feature is only available on MIG capable devices (e.g. the A100).
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
* When not running as `root`, the container user must have read access to the
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
### `NVIDIA_MIG_MONITOR_DEVICES`
This variable controls which of the visible GPUs can have aggregate information
about all of their MIG devices monitored from within the container. This
includes inspecting the aggregate memory usage, listing the aggregate running
processes, etc.
#### Possible values
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
MIG devices monitored.
**Note**:
* This feature is only available on MIG capable devices (e.g. the A100).
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
* When not running as `root`, the container user must have read access to the
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
### `NVIDIA_DRIVER_CAPABILITIES`
This option controls which driver libraries/binaries will be mounted inside the container.
#### Possible values
* `compute,video`, `graphics,utility` …: a comma-separated list of driver features the container needs.
* `all`: enable all available driver capabilities.
* *empty* or *unset*: use default driver capability: `utility,compute`.
#### Supported driver capabilities
* `compute`: required for CUDA and OpenCL applications.
* `compat32`: required for running 32-bit applications.
* `graphics`: required for running OpenGL and Vulkan applications.
* `utility`: required for using `nvidia-smi` and NVML.
* `video`: required for using the Video Codec SDK.
* `display`: required for leveraging X11 display.
### `NVIDIA_REQUIRE_*`
A logical expression to define constraints on the configurations supported by the container.
#### Supported constraints
* `cuda`: constraint on the CUDA driver version.
* `driver`: constraint on the driver version.
* `arch`: constraint on the compute architectures of the selected GPUs.
* `brand`: constraint on the brand of the selected GPUs (e.g. GeForce, Tesla, GRID).
#### Expressions
Multiple constraints can be expressed in a single environment variable: space-separated constraints are ORed, comma-separated constraints are ANDed.
Multiple environment variables of the form `NVIDIA_REQUIRE_*` are ANDed together.
### `NVIDIA_DISABLE_REQUIRE`
Single switch to disable all the constraints of the form `NVIDIA_REQUIRE_*`.
### `NVIDIA_REQUIRE_CUDA`
The version of the CUDA toolkit used by the container. It is an instance of the generic `NVIDIA_REQUIRE_*` case and it is set by official CUDA images.
If the version of the NVIDIA driver is insufficient to run this version of CUDA, the container will not be started.
#### Possible values
* `cuda>=7.5`, `cuda>=8.0`, `cuda>=9.0` …: any valid CUDA version in the form `major.minor`.
### `CUDA_VERSION`
Similar to `NVIDIA_REQUIRE_CUDA`, for legacy CUDA images.
In addition, if `NVIDIA_REQUIRE_CUDA` is not set, `NVIDIA_VISIBLE_DEVICES` and `NVIDIA_DRIVER_CAPABILITIES` will default to `all`.
## Usage example
**NOTE:** The use of the `nvidia-container-runtime` as CLI replacement for `runc` is uncommon and is only provided for completeness.
Although the `nvidia-container-runtime` is typically configured as a replacement for `runc` or `crun` in various container engines, it can also be
invoked from the command line as `runc` would. For example:
```sh
# Setup a rootfs based on Ubuntu 16.04
cd $(mktemp -d) && mkdir rootfs
curl -sS http://cdimage.ubuntu.com/ubuntu-base/releases/16.04/release/ubuntu-base-16.04.6-base-amd64.tar.gz | tar --exclude 'dev/*' -C rootfs -xz
# Create an OCI runtime spec
nvidia-container-runtime spec
sed -i 's;"sh";"nvidia-smi";' config.json
sed -i 's;\("TERM=xterm"\);\1, "NVIDIA_VISIBLE_DEVICES=0";' config.json
# Run the container
sudo nvidia-container-runtime run nvidia_smi
```

View File

@@ -3,7 +3,7 @@ package main
import (
"bytes"
"encoding/json"
"io"
"io/ioutil"
"log"
"os"
"os/exec"
@@ -11,20 +11,19 @@ import (
"strings"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
const (
nvidiaRuntime = "nvidia-container-runtime"
nvidiaHook = "nvidia-container-runtime-hook"
bundlePathSuffix = "tests/output/bundle/"
bundlePathSuffix = "test/output/bundle/"
specFile = "config.json"
unmodifiedSpecFileSuffix = "tests/input/test_spec.json"
unmodifiedSpecFileSuffix = "test/input/test_spec.json"
)
const (
@@ -46,8 +45,8 @@ func TestMain(m *testing.M) {
if err != nil {
log.Fatalf("error in test setup: could not get module root: %v", err)
}
testBinPath := filepath.Join(moduleRoot, "tests", "bin")
testInputPath := filepath.Join(moduleRoot, "tests", "input")
testBinPath := filepath.Join(moduleRoot, "test", "bin")
testInputPath := filepath.Join(moduleRoot, "test", "input")
// Set the environment variables for the test
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
@@ -87,7 +86,6 @@ func TestBadInput(t *testing.T) {
t.Fatal(err)
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
err = cmdCreate.Run()
@@ -105,7 +103,6 @@ func TestGoodInput(t *testing.T) {
t.Fatalf("error generating runtime spec: %v", err)
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
output, err := cmdRun.CombinedOutput()
@@ -116,7 +113,6 @@ func TestGoodInput(t *testing.T) {
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
require.Empty(t, spec.Hooks, "there should be no hooks in config.json")
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
err = cmdCreate.Run()
@@ -162,7 +158,6 @@ func TestDuplicateHook(t *testing.T) {
}
// Test how runtime handles already existing prestart hook in config.json
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle", cfg.bundlePath(), "testcontainer")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
output, err := cmdCreate.CombinedOutput()
@@ -193,16 +188,15 @@ func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
}
defer jsonFile.Close()
jsonContent, err := io.ReadAll(jsonFile)
switch {
case err != nil:
jsonContent, err := ioutil.ReadAll(jsonFile)
if err != nil {
return spec, err
case json.Valid(jsonContent):
} else if json.Valid(jsonContent) {
err = json.Unmarshal(jsonContent, &spec)
if err != nil {
return spec, err
}
default:
} else {
err = json.NewDecoder(bytes.NewReader(jsonContent)).Decode(&spec)
if err != nil {
return spec, err
@@ -232,7 +226,6 @@ func (c testConfig) generateNewRuntimeSpec() error {
return err
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmd := exec.Command("cp", c.unmodifiedSpecFile(), c.specFilePath())
err = cmd.Run()
if err != nil {

View File

@@ -1,184 +0,0 @@
/**
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package containerd
import (
"encoding/json"
"fmt"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
)
const (
Name = "containerd"
DefaultConfig = "/etc/containerd/config.toml"
DefaultSocket = "/run/containerd/containerd.sock"
DefaultRestartMode = "signal"
defaultRuntmeType = "io.containerd.runc.v2"
)
// Options stores the containerd-specific options
type Options struct {
useLegacyConfig bool
runtimeType string
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
runtimeConfigOverrideJSON string
}
func Flags(opts *Options) []cli.Flag {
flags := []cli.Flag{
&cli.BoolFlag{
Name: "use-legacy-config",
Usage: "Specify whether a legacy (pre v1.3) config should be used. " +
"This ensures that a version 1 container config is created by default and that the " +
"containerd.runtimes.default_runtime config section is used to define the default " +
"runtime instead of container.default_runtime_name.",
Destination: &opts.useLegacyConfig,
EnvVars: []string{"CONTAINERD_USE_LEGACY_CONFIG"},
},
&cli.StringFlag{
Name: "runtime-type",
Usage: "The runtime_type to use for the configured runtime classes",
Value: defaultRuntmeType,
Destination: &opts.runtimeType,
EnvVars: []string{"CONTAINERD_RUNTIME_TYPE"},
},
&cli.StringSliceFlag{
Name: "nvidia-container-runtime-modes.cdi.annotation-prefixes",
Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
},
&cli.StringFlag{
Name: "runtime-config-override",
Destination: &opts.runtimeConfigOverrideJSON,
Usage: "specify additional runtime options as a JSON string. The paths are relative to the runtime config.",
Value: "{}",
EnvVars: []string{"RUNTIME_CONFIG_OVERRIDE", "CONTAINERD_RUNTIME_CONFIG_OVERRIDE"},
},
}
return flags
}
// Setup updates a containerd configuration to include the nvidia-containerd-runtime and reloads it
func Setup(c *cli.Context, o *container.Options, co *Options) error {
log.Infof("Starting 'setup' for %v", c.App.Name)
cfg, err := getRuntimeConfig(o, co)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = o.Configure(cfg)
if err != nil {
return fmt.Errorf("unable to configure containerd: %v", err)
}
err = RestartContainerd(o)
if err != nil {
return fmt.Errorf("unable to restart containerd: %v", err)
}
log.Infof("Completed 'setup' for %v", c.App.Name)
return nil
}
// Cleanup reverts a containerd configuration to remove the nvidia-containerd-runtime and reloads it
func Cleanup(c *cli.Context, o *container.Options, co *Options) error {
log.Infof("Starting 'cleanup' for %v", c.App.Name)
cfg, err := getRuntimeConfig(o, co)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = o.Unconfigure(cfg)
if err != nil {
return fmt.Errorf("unable to unconfigure containerd: %v", err)
}
err = RestartContainerd(o)
if err != nil {
return fmt.Errorf("unable to restart containerd: %v", err)
}
log.Infof("Completed 'cleanup' for %v", c.App.Name)
return nil
}
// RestartContainerd restarts containerd depending on the value of restartModeFlag
func RestartContainerd(o *container.Options) error {
return o.Restart("containerd", SignalContainerd)
}
// containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
func (o *Options) containerAnnotationsFromCDIPrefixes() []string {
var annotations []string
for _, prefix := range o.ContainerRuntimeModesCDIAnnotationPrefixes.Value() {
annotations = append(annotations, prefix+"*")
}
return annotations
}
func (o *Options) runtimeConfigOverride() (map[string]interface{}, error) {
if o.runtimeConfigOverrideJSON == "" {
return nil, nil
}
runtimeOptions := make(map[string]interface{})
if err := json.Unmarshal([]byte(o.runtimeConfigOverrideJSON), &runtimeOptions); err != nil {
return nil, fmt.Errorf("failed to read %v as JSON: %w", o.runtimeConfigOverrideJSON, err)
}
return runtimeOptions, nil
}
func GetLowlevelRuntimePaths(o *container.Options, co *Options) ([]string, error) {
cfg, err := getRuntimeConfig(o, co)
if err != nil {
return nil, fmt.Errorf("unable to load containerd config: %w", err)
}
return engine.GetBinaryPathsForRuntimes(cfg), nil
}
func getRuntimeConfig(o *container.Options, co *Options) (engine.Interface, error) {
return containerd.New(
containerd.WithPath(o.Config),
containerd.WithConfigSource(
toml.LoadFirst(
containerd.CommandLineSource(o.HostRootMount, o.ExecutablePath),
toml.FromFile(o.Config),
),
),
containerd.WithRuntimeType(co.runtimeType),
containerd.WithUseLegacyConfig(co.useLegacyConfig),
containerd.WithContainerAnnotations(co.containerAnnotationsFromCDIPrefixes()...),
)
}

View File

@@ -1,113 +0,0 @@
/**
# Copyright 2020-2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"fmt"
"net"
"syscall"
"time"
log "github.com/sirupsen/logrus"
)
const (
reloadBackoff = 5 * time.Second
maxReloadAttempts = 6
socketMessageToGetPID = ""
)
// SignalContainerd sends a SIGHUP signal to the containerd daemon
func SignalContainerd(socket string) error {
log.Infof("Sending SIGHUP signal to containerd")
// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
retriable := func() error {
conn, err := net.Dial("unix", socket)
if err != nil {
return fmt.Errorf("unable to dial: %v", err)
}
defer conn.Close()
sconn, err := conn.(*net.UnixConn).SyscallConn()
if err != nil {
return fmt.Errorf("unable to get syscall connection: %v", err)
}
err1 := sconn.Control(func(fd uintptr) {
err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1)
})
if err1 != nil {
return fmt.Errorf("unable to issue call on socket fd: %v", err1)
}
if err != nil {
return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err)
}
_, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil)
if err != nil {
return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err)
}
oob := make([]byte, 1024)
_, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob)
if err != nil {
return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err)
}
oob = oob[:oobn]
scm, err := syscall.ParseSocketControlMessage(oob)
if err != nil {
return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err)
}
ucred, err := syscall.ParseUnixCredentials(&scm[0])
if err != nil {
return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err)
}
err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP)
if err != nil {
return fmt.Errorf("unable to send SIGHUP to 'containerd' process: %v", err)
}
return nil
}
// Try to send a SIGHUP up to maxReloadAttempts times
var err error
for i := 0; i < maxReloadAttempts; i++ {
err = retriable()
if err == nil {
break
}
if i == maxReloadAttempts-1 {
break
}
log.Warningf("Error signaling containerd, attempt %v/%v: %v", i+1, maxReloadAttempts, err)
time.Sleep(reloadBackoff)
}
if err != nil {
log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts)
return err
}
log.Infof("Successfully signaled containerd")
return nil
}

View File

@@ -1,29 +0,0 @@
//go:build !linux
// +build !linux
/**
# Copyright 2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"errors"
)
// SignalContainerd is unsupported on non-linux platforms.
func SignalContainerd(socket string) error {
return errors.New("SignalContainerd is unsupported on non-linux platforms")
}

View File

@@ -1,72 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestRuntimeOptions(t *testing.T) {
testCases := []struct {
description string
options Options
expected map[string]interface{}
expectedError error
}{
{
description: "empty is nil",
},
{
description: "empty json",
options: Options{
runtimeConfigOverrideJSON: "{}",
},
expected: map[string]interface{}{},
expectedError: nil,
},
{
description: "SystemdCgroup is true",
options: Options{
runtimeConfigOverrideJSON: "{\"SystemdCgroup\": true}",
},
expected: map[string]interface{}{
"SystemdCgroup": true,
},
expectedError: nil,
},
{
description: "SystemdCgroup is false",
options: Options{
runtimeConfigOverrideJSON: "{\"SystemdCgroup\": false}",
},
expected: map[string]interface{}{
"SystemdCgroup": false,
},
expectedError: nil,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
runtimeOptions, err := tc.options.runtimeConfigOverride()
require.ErrorIs(t, tc.expectedError, err)
require.EqualValues(t, tc.expected, runtimeOptions)
})
}
}

View File

@@ -1,210 +0,0 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package crio
import (
"fmt"
"os"
"path/filepath"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
)
const (
Name = "crio"
defaultConfigMode = "hook"
// Hook-based settings
defaultHooksDir = "/usr/share/containers/oci/hooks.d"
defaultHookFilename = "oci-nvidia-hook.json"
// Config-based settings
DefaultConfig = "/etc/crio/crio.conf"
DefaultSocket = "/var/run/crio/crio.sock"
DefaultRestartMode = "systemd"
)
// Options defines the cri-o specific options.
type Options struct {
configMode string
// hook-specific options
hooksDir string
hookFilename string
}
func Flags(opts *Options) []cli.Flag {
flags := []cli.Flag{
&cli.StringFlag{
Name: "hooks-dir",
Usage: "path to the cri-o hooks directory",
Value: defaultHooksDir,
Destination: &opts.hooksDir,
EnvVars: []string{"CRIO_HOOKS_DIR"},
DefaultText: defaultHooksDir,
},
&cli.StringFlag{
Name: "hook-filename",
Usage: "filename of the cri-o hook that will be created / removed in the hooks directory",
Value: defaultHookFilename,
Destination: &opts.hookFilename,
EnvVars: []string{"CRIO_HOOK_FILENAME"},
DefaultText: defaultHookFilename,
},
&cli.StringFlag{
Name: "config-mode",
Usage: "the configuration mode to use. One of [hook | config]",
Value: defaultConfigMode,
Destination: &opts.configMode,
EnvVars: []string{"CRIO_CONFIG_MODE"},
},
}
return flags
}
// Setup installs the prestart hook required to launch GPU-enabled containers
func Setup(c *cli.Context, o *container.Options, co *Options) error {
log.Infof("Starting 'setup' for %v", c.App.Name)
switch co.configMode {
case "hook":
return setupHook(o, co)
case "config":
return setupConfig(o)
default:
return fmt.Errorf("invalid config-mode '%v'", co.configMode)
}
}
// setupHook installs the prestart hook required to launch GPU-enabled containers
func setupHook(o *container.Options, co *Options) error {
log.Infof("Installing prestart hook")
hookPath := filepath.Join(co.hooksDir, co.hookFilename)
err := ocihook.CreateHook(hookPath, filepath.Join(o.RuntimeDir, config.NVIDIAContainerRuntimeHookExecutable))
if err != nil {
return fmt.Errorf("error creating hook: %v", err)
}
return nil
}
// setupConfig updates the cri-o config for the NVIDIA container runtime
func setupConfig(o *container.Options) error {
log.Infof("Updating config file")
cfg, err := getRuntimeConfig(o)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = o.Configure(cfg)
if err != nil {
return fmt.Errorf("unable to configure cri-o: %v", err)
}
err = RestartCrio(o)
if err != nil {
return fmt.Errorf("unable to restart crio: %v", err)
}
return nil
}
// Cleanup removes the specified prestart hook
func Cleanup(c *cli.Context, o *container.Options, co *Options) error {
log.Infof("Starting 'cleanup' for %v", c.App.Name)
switch co.configMode {
case "hook":
return cleanupHook(co)
case "config":
return cleanupConfig(o)
default:
return fmt.Errorf("invalid config-mode '%v'", co.configMode)
}
}
// cleanupHook removes the prestart hook
func cleanupHook(co *Options) error {
log.Infof("Removing prestart hook")
hookPath := filepath.Join(co.hooksDir, co.hookFilename)
err := os.Remove(hookPath)
if err != nil {
return fmt.Errorf("error removing hook '%v': %v", hookPath, err)
}
return nil
}
// cleanupConfig removes the NVIDIA container runtime from the cri-o config
func cleanupConfig(o *container.Options) error {
log.Infof("Reverting config file modifications")
cfg, err := getRuntimeConfig(o)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = o.Unconfigure(cfg)
if err != nil {
return fmt.Errorf("unable to unconfigure cri-o: %v", err)
}
err = RestartCrio(o)
if err != nil {
return fmt.Errorf("unable to restart crio: %v", err)
}
return nil
}
// RestartCrio restarts crio depending on the value of restartModeFlag
func RestartCrio(o *container.Options) error {
return o.Restart("crio", func(string) error { return fmt.Errorf("supporting crio via signal is unsupported") })
}
func GetLowlevelRuntimePaths(o *container.Options) ([]string, error) {
cfg, err := getRuntimeConfig(o)
if err != nil {
return nil, fmt.Errorf("unable to load crio config: %w", err)
}
return engine.GetBinaryPathsForRuntimes(cfg), nil
}
func getRuntimeConfig(o *container.Options) (engine.Interface, error) {
return crio.New(
crio.WithPath(o.Config),
crio.WithConfigSource(
toml.LoadFirst(
crio.CommandLineSource(o.HostRootMount, o.ExecutablePath),
toml.FromFile(o.Config),
),
),
)
}

View File

@@ -1,111 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"fmt"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
)
const (
Name = "docker"
DefaultConfig = "/etc/docker/daemon.json"
DefaultSocket = "/var/run/docker.sock"
DefaultRestartMode = "signal"
)
type Options struct{}
func Flags(opts *Options) []cli.Flag {
return nil
}
// Setup updates docker configuration to include the nvidia runtime and reloads it
func Setup(c *cli.Context, o *container.Options) error {
log.Infof("Starting 'setup' for %v", c.App.Name)
cfg, err := getRuntimeConfig(o)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = o.Configure(cfg)
if err != nil {
return fmt.Errorf("unable to configure docker: %v", err)
}
err = RestartDocker(o)
if err != nil {
return fmt.Errorf("unable to restart docker: %v", err)
}
log.Infof("Completed 'setup' for %v", c.App.Name)
return nil
}
// Cleanup reverts docker configuration to remove the nvidia runtime and reloads it
func Cleanup(c *cli.Context, o *container.Options) error {
log.Infof("Starting 'cleanup' for %v", c.App.Name)
cfg, err := getRuntimeConfig(o)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = o.Unconfigure(cfg)
if err != nil {
return fmt.Errorf("unable to unconfigure docker: %v", err)
}
err = RestartDocker(o)
if err != nil {
return fmt.Errorf("unable to signal docker: %v", err)
}
log.Infof("Completed 'cleanup' for %v", c.App.Name)
return nil
}
// RestartDocker restarts docker depending on the value of restartModeFlag
func RestartDocker(o *container.Options) error {
return o.Restart("docker", SignalDocker)
}
func GetLowlevelRuntimePaths(o *container.Options) ([]string, error) {
cfg, err := docker.New(
docker.WithPath(o.Config),
)
if err != nil {
return nil, fmt.Errorf("unable to load docker config: %w", err)
}
return engine.GetBinaryPathsForRuntimes(cfg), nil
}
func getRuntimeConfig(o *container.Options) (engine.Interface, error) {
return docker.New(
docker.WithPath(o.Config),
)
}

View File

@@ -1,113 +0,0 @@
/**
# Copyright 2021-2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package docker
import (
"fmt"
"net"
"syscall"
"time"
log "github.com/sirupsen/logrus"
)
const (
reloadBackoff = 5 * time.Second
maxReloadAttempts = 6
socketMessageToGetPID = "GET /info HTTP/1.0\r\n\r\n"
)
// SignalDocker sends a SIGHUP signal to docker daemon
func SignalDocker(socket string) error {
log.Infof("Sending SIGHUP signal to docker")
// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
retriable := func() error {
conn, err := net.Dial("unix", socket)
if err != nil {
return fmt.Errorf("unable to dial: %v", err)
}
defer conn.Close()
sconn, err := conn.(*net.UnixConn).SyscallConn()
if err != nil {
return fmt.Errorf("unable to get syscall connection: %v", err)
}
err1 := sconn.Control(func(fd uintptr) {
err = syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1)
})
if err1 != nil {
return fmt.Errorf("unable to issue call on socket fd: %v", err1)
}
if err != nil {
return fmt.Errorf("unable to SetsockoptInt on socket fd: %v", err)
}
_, _, err = conn.(*net.UnixConn).WriteMsgUnix([]byte(socketMessageToGetPID), nil, nil)
if err != nil {
return fmt.Errorf("unable to WriteMsgUnix on socket fd: %v", err)
}
oob := make([]byte, 1024)
_, oobn, _, _, err := conn.(*net.UnixConn).ReadMsgUnix(nil, oob)
if err != nil {
return fmt.Errorf("unable to ReadMsgUnix on socket fd: %v", err)
}
oob = oob[:oobn]
scm, err := syscall.ParseSocketControlMessage(oob)
if err != nil {
return fmt.Errorf("unable to ParseSocketControlMessage from message received on socket fd: %v", err)
}
ucred, err := syscall.ParseUnixCredentials(&scm[0])
if err != nil {
return fmt.Errorf("unable to ParseUnixCredentials from message received on socket fd: %v", err)
}
err = syscall.Kill(int(ucred.Pid), syscall.SIGHUP)
if err != nil {
return fmt.Errorf("unable to send SIGHUP to 'docker' process: %v", err)
}
return nil
}
// Try to send a SIGHUP up to maxReloadAttempts times
var err error
for i := 0; i < maxReloadAttempts; i++ {
err = retriable()
if err == nil {
break
}
if i == maxReloadAttempts-1 {
break
}
log.Warningf("Error signaling docker, attempt %v/%v: %v", i+1, maxReloadAttempts, err)
time.Sleep(reloadBackoff)
}
if err != nil {
log.Warningf("Max retries reached %v/%v, aborting", maxReloadAttempts, maxReloadAttempts)
return err
}
log.Infof("Successfully signaled docker")
return nil
}

View File

@@ -1,204 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package runtime
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/crio"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/docker"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
const (
defaultSetAsDefault = true
// defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled
defaultRuntimeName = "nvidia"
defaultHostRootMount = "/host"
runtimeSpecificDefault = "RUNTIME_SPECIFIC_DEFAULT"
)
type Options struct {
container.Options
containerdOptions containerd.Options
crioOptions crio.Options
}
func Flags(opts *Options) []cli.Flag {
flags := []cli.Flag{
&cli.StringFlag{
Name: "config",
Usage: "Path to the runtime config file",
Value: runtimeSpecificDefault,
Destination: &opts.Config,
EnvVars: []string{"RUNTIME_CONFIG", "CONTAINERD_CONFIG", "DOCKER_CONFIG"},
},
&cli.StringFlag{
Name: "executable-path",
Usage: "The path to the runtime executable. This is used to extract the current config",
Destination: &opts.ExecutablePath,
EnvVars: []string{"RUNTIME_EXECUTABLE_PATH"},
},
&cli.StringFlag{
Name: "socket",
Usage: "Path to the runtime socket file",
Value: runtimeSpecificDefault,
Destination: &opts.Socket,
EnvVars: []string{"RUNTIME_SOCKET", "CONTAINERD_SOCKET", "DOCKER_SOCKET"},
},
&cli.StringFlag{
Name: "restart-mode",
Usage: "Specify how the runtime should be restarted; If 'none' is selected it will not be restarted [signal | systemd | none ]",
Value: runtimeSpecificDefault,
Destination: &opts.RestartMode,
EnvVars: []string{"RUNTIME_RESTART_MODE"},
},
&cli.BoolFlag{
Name: "enable-cdi-in-runtime",
Usage: "Enable CDI in the configured runt ime",
Destination: &opts.EnableCDI,
EnvVars: []string{"RUNTIME_ENABLE_CDI"},
},
&cli.StringFlag{
Name: "host-root",
Usage: "Specify the path to the host root to be used when restarting the runtime using systemd",
Value: defaultHostRootMount,
Destination: &opts.HostRootMount,
EnvVars: []string{"HOST_ROOT_MOUNT"},
},
&cli.StringFlag{
Name: "runtime-name",
Aliases: []string{"nvidia-runtime-name", "runtime-class"},
Usage: "Specify the name of the `nvidia` runtime. If set-as-default is selected, the runtime is used as the default runtime.",
Value: defaultRuntimeName,
Destination: &opts.RuntimeName,
EnvVars: []string{"NVIDIA_RUNTIME_NAME", "CONTAINERD_RUNTIME_CLASS", "DOCKER_RUNTIME_NAME"},
},
&cli.BoolFlag{
Name: "set-as-default",
Usage: "Set the `nvidia` runtime as the default runtime.",
Value: defaultSetAsDefault,
Destination: &opts.SetAsDefault,
EnvVars: []string{"NVIDIA_RUNTIME_SET_AS_DEFAULT", "CONTAINERD_SET_AS_DEFAULT", "DOCKER_SET_AS_DEFAULT"},
Hidden: true,
},
}
flags = append(flags, containerd.Flags(&opts.containerdOptions)...)
flags = append(flags, crio.Flags(&opts.crioOptions)...)
return flags
}
// Validate checks whether the specified options are valid
func (opts *Options) Validate(logger logger.Interface, c *cli.Context, runtime string, toolkitRoot string, to *toolkit.Options) error {
// We set this option here to ensure that it is available in future calls.
opts.RuntimeDir = toolkitRoot
if !c.IsSet("enable-cdi-in-runtime") {
opts.EnableCDI = to.CDI.Enabled
}
if opts.ExecutablePath != "" && opts.RuntimeName == docker.Name {
logger.Warningf("Ignoring executable-path=%q flag for %v", opts.ExecutablePath, opts.RuntimeName)
opts.ExecutablePath = ""
}
// Apply the runtime-specific config changes.
switch runtime {
case containerd.Name:
if opts.Config == runtimeSpecificDefault {
opts.Config = containerd.DefaultConfig
}
if opts.Socket == runtimeSpecificDefault {
opts.Socket = containerd.DefaultSocket
}
if opts.RestartMode == runtimeSpecificDefault {
opts.RestartMode = containerd.DefaultRestartMode
}
case crio.Name:
if opts.Config == runtimeSpecificDefault {
opts.Config = crio.DefaultConfig
}
if opts.Socket == runtimeSpecificDefault {
opts.Socket = crio.DefaultSocket
}
if opts.RestartMode == runtimeSpecificDefault {
opts.RestartMode = crio.DefaultRestartMode
}
case docker.Name:
if opts.Config == runtimeSpecificDefault {
opts.Config = docker.DefaultConfig
}
if opts.Socket == runtimeSpecificDefault {
opts.Socket = docker.DefaultSocket
}
if opts.RestartMode == runtimeSpecificDefault {
opts.RestartMode = docker.DefaultRestartMode
}
default:
return fmt.Errorf("undefined runtime %v", runtime)
}
return nil
}
func Setup(c *cli.Context, opts *Options, runtime string) error {
switch runtime {
case containerd.Name:
return containerd.Setup(c, &opts.Options, &opts.containerdOptions)
case crio.Name:
return crio.Setup(c, &opts.Options, &opts.crioOptions)
case docker.Name:
return docker.Setup(c, &opts.Options)
default:
return fmt.Errorf("undefined runtime %v", runtime)
}
}
func Cleanup(c *cli.Context, opts *Options, runtime string) error {
switch runtime {
case containerd.Name:
return containerd.Cleanup(c, &opts.Options, &opts.containerdOptions)
case crio.Name:
return crio.Cleanup(c, &opts.Options, &opts.crioOptions)
case docker.Name:
return docker.Cleanup(c, &opts.Options)
default:
return fmt.Errorf("undefined runtime %v", runtime)
}
}
func GetLowlevelRuntimePaths(opts *Options, runtime string) ([]string, error) {
switch runtime {
case containerd.Name:
return containerd.GetLowlevelRuntimePaths(&opts.Options, &opts.containerdOptions)
case crio.Name:
return crio.GetLowlevelRuntimePaths(&opts.Options)
case docker.Name:
return docker.GetLowlevelRuntimePaths(&opts.Options)
default:
return nil, fmt.Errorf("undefined runtime %v", runtime)
}
}

View File

@@ -1,279 +0,0 @@
package main
import (
"fmt"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/urfave/cli/v2"
"golang.org/x/sys/unix"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
const (
toolkitPidFilename = "toolkit.pid"
defaultPidFile = "/run/nvidia/toolkit/" + toolkitPidFilename
defaultToolkitInstallDir = "/usr/local/nvidia"
toolkitSubDir = "toolkit"
defaultRuntime = "docker"
)
var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "containerd": {}}
var defaultLowLevelRuntimes = []string{"runc", "crun"}
var waitingForSignal = make(chan bool, 1)
var signalReceived = make(chan bool, 1)
// options stores the command line arguments
type options struct {
toolkitInstallDir string
noDaemon bool
runtime string
pidFile string
sourceRoot string
toolkitOptions toolkit.Options
runtimeOptions runtime.Options
}
func (o options) toolkitRoot() string {
return filepath.Join(o.toolkitInstallDir, toolkitSubDir)
}
func main() {
logger := logger.New()
c := NewApp(logger)
// Run the CLI
logger.Infof("Starting %v", c.Name)
if err := c.Run(os.Args); err != nil {
logger.Errorf("error running %v: %v", c.Name, err)
os.Exit(1)
}
logger.Infof("Completed %v", c.Name)
}
// An app represents the nvidia-ctk-installer.
type app struct {
logger logger.Interface
toolkit *toolkit.Installer
}
// NewApp creates the CLI app fro the specified options.
func NewApp(logger logger.Interface) *cli.App {
a := app{
logger: logger,
}
return a.build()
}
func (a app) build() *cli.App {
options := options{
toolkitOptions: toolkit.Options{},
}
// Create the top-level CLI
c := cli.NewApp()
c.Name = "nvidia-ctk-installer"
c.Usage = "Install the NVIDIA Container Toolkit and configure the specified runtime to use the `nvidia` runtime."
c.Version = info.GetVersionString()
c.Before = func(ctx *cli.Context) error {
return a.Before(ctx, &options)
}
c.Action = func(ctx *cli.Context) error {
return a.Run(ctx, &options)
}
// Setup flags for the CLI
c.Flags = []cli.Flag{
&cli.BoolFlag{
Name: "no-daemon",
Aliases: []string{"n"},
Usage: "terminate immediately after setting up the runtime. Note that no cleanup will be performed",
Destination: &options.noDaemon,
EnvVars: []string{"NO_DAEMON"},
},
&cli.StringFlag{
Name: "runtime",
Aliases: []string{"r"},
Usage: "the runtime to setup on this node. One of {'docker', 'crio', 'containerd'}",
Value: defaultRuntime,
Destination: &options.runtime,
EnvVars: []string{"RUNTIME"},
},
&cli.StringFlag{
Name: "toolkit-install-dir",
Aliases: []string{"root"},
Usage: "The directory where the NVIDIA Container Toolkit is to be installed. " +
"The components of the toolkit will be installed to `ROOT`/toolkit. " +
"Note that in the case of a containerized installer, this is the path in the container and it is " +
"recommended that this match the path on the host.",
Value: defaultToolkitInstallDir,
Destination: &options.toolkitInstallDir,
EnvVars: []string{"TOOLKIT_INSTALL_DIR", "ROOT"},
},
&cli.StringFlag{
Name: "source-root",
Value: "/",
Usage: "The folder where the required toolkit artifacts can be found",
Destination: &options.sourceRoot,
EnvVars: []string{"SOURCE_ROOT"},
},
&cli.StringFlag{
Name: "pid-file",
Value: defaultPidFile,
Usage: "the path to a toolkit.pid file to ensure that only a single configuration instance is running",
Destination: &options.pidFile,
EnvVars: []string{"TOOLKIT_PID_FILE", "PID_FILE"},
},
}
c.Flags = append(c.Flags, toolkit.Flags(&options.toolkitOptions)...)
c.Flags = append(c.Flags, runtime.Flags(&options.runtimeOptions)...)
return c
}
func (a *app) Before(c *cli.Context, o *options) error {
a.toolkit = toolkit.NewInstaller(
toolkit.WithLogger(a.logger),
toolkit.WithSourceRoot(o.sourceRoot),
toolkit.WithToolkitRoot(o.toolkitRoot()),
)
return a.validateFlags(c, o)
}
func (a *app) validateFlags(c *cli.Context, o *options) error {
if o.toolkitInstallDir == "" {
return fmt.Errorf("the install root must be specified")
}
if _, exists := availableRuntimes[o.runtime]; !exists {
return fmt.Errorf("unknown runtime: %v", o.runtime)
}
if filepath.Base(o.pidFile) != toolkitPidFilename {
return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile)
}
if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil {
return err
}
if err := o.runtimeOptions.Validate(a.logger, c, o.runtime, o.toolkitRoot(), &o.toolkitOptions); err != nil {
return err
}
return nil
}
// Run installs the NVIDIA Container Toolkit and updates the requested runtime.
// If the application is run as a daemon, the application waits and unconfigures
// the runtime on termination.
func (a *app) Run(c *cli.Context, o *options) error {
err := a.initialize(o.pidFile)
if err != nil {
return fmt.Errorf("unable to initialize: %v", err)
}
defer a.shutdown(o.pidFile)
if len(o.toolkitOptions.ContainerRuntimeRuntimes.Value()) == 0 {
lowlevelRuntimePaths, err := runtime.GetLowlevelRuntimePaths(&o.runtimeOptions, o.runtime)
if err != nil {
return fmt.Errorf("unable to determine runtime options: %w", err)
}
lowlevelRuntimePaths = append(lowlevelRuntimePaths, defaultLowLevelRuntimes...)
o.toolkitOptions.ContainerRuntimeRuntimes = *cli.NewStringSlice(lowlevelRuntimePaths...)
}
err = a.toolkit.Install(c, &o.toolkitOptions)
if err != nil {
return fmt.Errorf("unable to install toolkit: %v", err)
}
err = runtime.Setup(c, &o.runtimeOptions, o.runtime)
if err != nil {
return fmt.Errorf("unable to setup runtime: %v", err)
}
if !o.noDaemon {
err = a.waitForSignal()
if err != nil {
return fmt.Errorf("unable to wait for signal: %v", err)
}
err = runtime.Cleanup(c, &o.runtimeOptions, o.runtime)
if err != nil {
return fmt.Errorf("unable to cleanup runtime: %v", err)
}
}
return nil
}
func (a *app) initialize(pidFile string) error {
a.logger.Infof("Initializing")
if dir := filepath.Dir(pidFile); dir != "" {
err := os.MkdirAll(dir, 0755)
if err != nil {
return fmt.Errorf("unable to create folder for pidfile: %w", err)
}
}
f, err := os.Create(pidFile)
if err != nil {
return fmt.Errorf("unable to create pidfile: %v", err)
}
err = unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB)
if err != nil {
a.logger.Warningf("Unable to get exclusive lock on '%v'", pidFile)
a.logger.Warningf("This normally means an instance of the NVIDIA toolkit Container is already running, aborting")
return fmt.Errorf("unable to get flock on pidfile: %v", err)
}
_, err = fmt.Fprintf(f, "%v\n", os.Getpid())
if err != nil {
return fmt.Errorf("unable to write PID to pidfile: %v", err)
}
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGHUP, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGPIPE, syscall.SIGTERM)
go func() {
<-sigs
select {
case <-waitingForSignal:
signalReceived <- true
default:
a.logger.Infof("Signal received, exiting early")
a.shutdown(pidFile)
os.Exit(0)
}
}()
return nil
}
func (a *app) waitForSignal() error {
a.logger.Infof("Waiting for signal")
waitingForSignal <- true
<-signalReceived
return nil
}
func (a *app) shutdown(pidFile string) {
a.logger.Infof("Shutting Down")
err := os.Remove(pidFile)
if err != nil {
a.logger.Warningf("Unable to remove pidfile: %v", err)
}
}

View File

@@ -1,455 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"path/filepath"
"strings"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
func TestApp(t *testing.T) {
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
logger, _ := testlog.NewNullLogger()
moduleRoot, err := test.GetModuleRoot()
require.NoError(t, err)
artifactRoot := filepath.Join(moduleRoot, "testdata", "installer", "artifacts")
hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1")
testCases := []struct {
description string
args []string
expectedToolkitConfig string
expectedRuntimeConfig string
}{
{
description: "no args",
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime.modes.legacy]
cuda-compat-mode = "ldconfig"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
},
"nvidia-cdi": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
},
"nvidia-legacy": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
}
}
}`,
},
{
description: "CDI enabled enables CDI in docker",
args: []string{"--cdi-enabled", "--create-device-nodes=none"},
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime.modes.legacy]
cuda-compat-mode = "ldconfig"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `{
"default-runtime": "nvidia",
"features": {
"cdi": true
},
"runtimes": {
"nvidia": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
},
"nvidia-cdi": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
},
"nvidia-legacy": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
}
}
}`,
},
{
description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in Docker",
args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false"},
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime.modes.legacy]
cuda-compat-mode = "ldconfig"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
},
"nvidia-cdi": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
},
"nvidia-legacy": {
"args": [],
"path": "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
}
}
}`,
},
{
description: "CDI enabled enables CDI in containerd",
args: []string{"--cdi-enabled", "--runtime=containerd"},
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime.modes.legacy]
cuda-compat-mode = "ldconfig"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
enable_cdi = true
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
`,
},
{
description: "--enable-cdi-in-runtime=false overrides --cdi-enabled in containerd",
args: []string{"--cdi-enabled", "--create-device-nodes=none", "--enable-cdi-in-runtime=false", "--runtime=containerd"},
expectedToolkitConfig: `accept-nvidia-visible-devices-as-volume-mounts = false
accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
swarm-resource = ""
[nvidia-container-cli]
debug = ""
environment = []
ldcache = ""
ldconfig = "@/run/nvidia/driver/sbin/ldconfig"
load-kmods = true
no-cgroups = false
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-cli"
root = "/run/nvidia/driver"
user = ""
[nvidia-container-runtime]
debug = "/dev/null"
log-level = "info"
mode = "auto"
runtimes = ["runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime.modes.legacy]
cuda-compat-mode = "ldconfig"
[nvidia-container-runtime-hook]
path = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime-hook"
skip-mode-detection = true
[nvidia-ctk]
path = "{{ .toolkitRoot }}/toolkit/nvidia-ctk"
`,
expectedRuntimeConfig: `version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-cdi.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.cdi"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-legacy.options]
BinaryName = "{{ .toolkitRoot }}/toolkit/nvidia-container-runtime.legacy"
`,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
testRoot := t.TempDir()
cdiOutputDir := filepath.Join(testRoot, "/var/run/cdi")
runtimeConfigFile := filepath.Join(testRoot, "config.file")
toolkitRoot := filepath.Join(testRoot, "toolkit-test")
toolkitConfigFile := filepath.Join(toolkitRoot, "toolkit/.config/nvidia-container-runtime/config.toml")
app := NewApp(logger)
testArgs := []string{
"nvidia-ctk-installer",
"--toolkit-install-dir=" + toolkitRoot,
"--no-daemon",
"--cdi-output-dir=" + cdiOutputDir,
"--config=" + runtimeConfigFile,
"--create-device-nodes=none",
"--driver-root-ctr-path=" + hostRoot,
"--pid-file=" + filepath.Join(testRoot, "toolkit.pid"),
"--restart-mode=none",
"--source-root=" + filepath.Join(artifactRoot, "deb"),
}
err := app.Run(append(testArgs, tc.args...))
require.NoError(t, err)
require.FileExists(t, toolkitConfigFile)
toolkitConfigFileContents, err := os.ReadFile(toolkitConfigFile)
require.NoError(t, err)
require.EqualValues(t, strings.ReplaceAll(tc.expectedToolkitConfig, "{{ .toolkitRoot }}", toolkitRoot), string(toolkitConfigFileContents))
require.FileExists(t, runtimeConfigFile)
runtimeConfigFileContents, err := os.ReadFile(runtimeConfigFile)
require.NoError(t, err)
require.EqualValues(t, strings.ReplaceAll(tc.expectedRuntimeConfig, "{{ .toolkitRoot }}", toolkitRoot), string(runtimeConfigFileContents))
})
}
}

View File

@@ -1,85 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package installer
import (
"fmt"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
// An artifactRoot is used as a source for installed artifacts.
// It is refined by a directory path, a library locator, and an executable locator.
type artifactRoot struct {
path string
libraries lookup.Locator
executables lookup.Locator
}
func newArtifactRoot(logger logger.Interface, rootDirectoryPath string) (*artifactRoot, error) {
relativeLibrarySearchPaths := []string{
"/usr/lib64",
"/usr/lib/x86_64-linux-gnu",
"/usr/lib/aarch64-linux-gnu",
}
var librarySearchPaths []string
for _, l := range relativeLibrarySearchPaths {
librarySearchPaths = append(librarySearchPaths, filepath.Join(rootDirectoryPath, l))
}
a := artifactRoot{
path: rootDirectoryPath,
libraries: lookup.NewLibraryLocator(
lookup.WithLogger(logger),
lookup.WithCount(1),
lookup.WithSearchPaths(librarySearchPaths...),
),
executables: lookup.NewExecutableLocator(
logger,
rootDirectoryPath,
),
}
return &a, nil
}
func (r *artifactRoot) findLibrary(name string) (string, error) {
candidates, err := r.libraries.Locate(name)
if err != nil {
return "", fmt.Errorf("error locating library: %w", err)
}
if len(candidates) == 0 {
return "", fmt.Errorf("library %v not found", name)
}
return candidates[0], nil
}
func (r *artifactRoot) findExecutable(name string) (string, error) {
candidates, err := r.executables.Locate(name)
if err != nil {
return "", fmt.Errorf("error locating executable: %w", err)
}
if len(candidates) == 0 {
return "", fmt.Errorf("executable %v not found", name)
}
return candidates[0], nil
}

View File

@@ -1,47 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package installer
import (
"fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type createDirectory struct {
logger logger.Interface
}
func (t *toolkitInstaller) createDirectory() Installer {
return &createDirectory{
logger: t.logger,
}
}
func (d *createDirectory) Install(dir string) error {
if dir == "" {
return nil
}
d.logger.Infof("Creating directory '%v'", dir)
err := os.MkdirAll(dir, 0755)
if err != nil {
return fmt.Errorf("error creating directory: %v", err)
}
return nil
}

View File

@@ -1,184 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package installer
import (
"bytes"
"fmt"
"html/template"
"io"
"path/filepath"
"strings"
log "github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/operator"
)
type executable struct {
requiresKernelModule bool
path string
symlink string
args []string
env map[string]string
}
func (t *toolkitInstaller) collectExecutables(destDir string) ([]Installer, error) {
configHome := filepath.Join(destDir, ".config")
configDir := filepath.Join(configHome, "nvidia-container-runtime")
configPath := filepath.Join(configDir, "config.toml")
executables := []executable{
{
path: "nvidia-ctk",
},
{
path: "nvidia-cdi-hook",
},
}
for _, runtime := range operator.GetRuntimes() {
e := executable{
path: runtime.Path,
requiresKernelModule: true,
env: map[string]string{
"XDG_CONFIG_HOME": configHome,
},
}
executables = append(executables, e)
}
executables = append(executables,
executable{
path: "nvidia-container-cli",
env: map[string]string{"LD_LIBRARY_PATH": destDir + ":$LD_LIBRARY_PATH"},
},
)
executables = append(executables,
executable{
path: "nvidia-container-runtime-hook",
symlink: "nvidia-container-toolkit",
args: []string{fmt.Sprintf("-config %s", configPath)},
},
)
var installers []Installer
for _, executable := range executables {
executablePath, err := t.artifactRoot.findExecutable(executable.path)
if err != nil {
if t.ignoreErrors {
log.Errorf("Ignoring error: %v", err)
continue
}
return nil, err
}
wrappedExecutableFilename := filepath.Base(executablePath)
dotRealFilename := wrappedExecutableFilename + ".real"
w := &wrapper{
Source: executablePath,
WrappedExecutable: dotRealFilename,
CheckModules: executable.requiresKernelModule,
Args: executable.args,
Envvars: map[string]string{
"PATH": strings.Join([]string{destDir, "$PATH"}, ":"),
},
}
for k, v := range executable.env {
w.Envvars[k] = v
}
installers = append(installers, w)
if executable.symlink == "" {
continue
}
link := symlink{
linkname: executable.symlink,
target: filepath.Base(executablePath),
}
installers = append(installers, link)
}
return installers, nil
}
type wrapper struct {
Source string
Envvars map[string]string
WrappedExecutable string
CheckModules bool
Args []string
}
type render struct {
*wrapper
DestDir string
}
func (w *wrapper) Install(destDir string) error {
// Copy the executable with a .real extension.
mode, err := installFile(w.Source, filepath.Join(destDir, w.WrappedExecutable))
if err != nil {
return err
}
// Create a wrapper file.
r := render{
wrapper: w,
DestDir: destDir,
}
content, err := r.render()
if err != nil {
return fmt.Errorf("failed to render wrapper: %w", err)
}
wrapperFile := filepath.Join(destDir, filepath.Base(w.Source))
return installContent(content, wrapperFile, mode|0111)
}
func (w *render) render() (io.Reader, error) {
wrapperTemplate := `#! /bin/sh
{{- if (.CheckModules) }}
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
if [ "${?}" != "0" ]; then
echo "nvidia driver modules are not yet loaded, invoking runc directly"
exec runc "$@"
fi
{{- end }}
{{- range $key, $value := .Envvars }}
{{$key}}={{$value}} \
{{- end }}
{{ .DestDir }}/{{ .WrappedExecutable }} \
{{- range $arg := .Args }}
{{$arg}} \
{{- end }}
"$@"
`
var content bytes.Buffer
tmpl, err := template.New("wrapper").Parse(wrapperTemplate)
if err != nil {
return nil, err
}
if err := tmpl.Execute(&content, w); err != nil {
return nil, err
}
return &content, nil
}

View File

@@ -1,104 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package installer
import (
"bytes"
"testing"
"github.com/stretchr/testify/require"
)
func TestWrapperRender(t *testing.T) {
testCases := []struct {
description string
w *wrapper
expected string
}{
{
description: "executable is added",
w: &wrapper{
WrappedExecutable: "some-runtime",
},
expected: `#! /bin/sh
/dest-dir/some-runtime \
"$@"
`,
},
{
description: "module check is added",
w: &wrapper{
WrappedExecutable: "some-runtime",
CheckModules: true,
},
expected: `#! /bin/sh
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
if [ "${?}" != "0" ]; then
echo "nvidia driver modules are not yet loaded, invoking runc directly"
exec runc "$@"
fi
/dest-dir/some-runtime \
"$@"
`,
},
{
description: "environment is added",
w: &wrapper{
WrappedExecutable: "some-runtime",
Envvars: map[string]string{
"PATH": "/foo/bar/baz",
},
},
expected: `#! /bin/sh
PATH=/foo/bar/baz \
/dest-dir/some-runtime \
"$@"
`,
},
{
description: "args are added",
w: &wrapper{
WrappedExecutable: "some-runtime",
Args: []string{"--config foo", "bar"},
},
expected: `#! /bin/sh
/dest-dir/some-runtime \
--config foo \
bar \
"$@"
`,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
r := render{
wrapper: tc.w,
DestDir: "/dest-dir",
}
reader, err := r.render()
require.NoError(t, err)
var content bytes.Buffer
_, err = content.ReadFrom(reader)
require.NoError(t, err)
require.Equal(t, tc.expected, content.String())
})
}
}

View File

@@ -1,188 +0,0 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package installer
import (
"io"
"os"
"sync"
)
// Ensure, that fileInstallerMock does implement fileInstaller.
// If this is not the case, regenerate this file with moq.
var _ fileInstaller = &fileInstallerMock{}
// fileInstallerMock is a mock implementation of fileInstaller.
//
// func TestSomethingThatUsesfileInstaller(t *testing.T) {
//
// // make and configure a mocked fileInstaller
// mockedfileInstaller := &fileInstallerMock{
// installContentFunc: func(reader io.Reader, s string, v os.FileMode) error {
// panic("mock out the installContent method")
// },
// installFileFunc: func(s1 string, s2 string) (os.FileMode, error) {
// panic("mock out the installFile method")
// },
// installSymlinkFunc: func(s1 string, s2 string) error {
// panic("mock out the installSymlink method")
// },
// }
//
// // use mockedfileInstaller in code that requires fileInstaller
// // and then make assertions.
//
// }
type fileInstallerMock struct {
// installContentFunc mocks the installContent method.
installContentFunc func(reader io.Reader, s string, v os.FileMode) error
// installFileFunc mocks the installFile method.
installFileFunc func(s1 string, s2 string) (os.FileMode, error)
// installSymlinkFunc mocks the installSymlink method.
installSymlinkFunc func(s1 string, s2 string) error
// calls tracks calls to the methods.
calls struct {
// installContent holds details about calls to the installContent method.
installContent []struct {
// Reader is the reader argument value.
Reader io.Reader
// S is the s argument value.
S string
// V is the v argument value.
V os.FileMode
}
// installFile holds details about calls to the installFile method.
installFile []struct {
// S1 is the s1 argument value.
S1 string
// S2 is the s2 argument value.
S2 string
}
// installSymlink holds details about calls to the installSymlink method.
installSymlink []struct {
// S1 is the s1 argument value.
S1 string
// S2 is the s2 argument value.
S2 string
}
}
lockinstallContent sync.RWMutex
lockinstallFile sync.RWMutex
lockinstallSymlink sync.RWMutex
}
// installContent calls installContentFunc.
func (mock *fileInstallerMock) installContent(reader io.Reader, s string, v os.FileMode) error {
if mock.installContentFunc == nil {
panic("fileInstallerMock.installContentFunc: method is nil but fileInstaller.installContent was just called")
}
callInfo := struct {
Reader io.Reader
S string
V os.FileMode
}{
Reader: reader,
S: s,
V: v,
}
mock.lockinstallContent.Lock()
mock.calls.installContent = append(mock.calls.installContent, callInfo)
mock.lockinstallContent.Unlock()
return mock.installContentFunc(reader, s, v)
}
// installContentCalls gets all the calls that were made to installContent.
// Check the length with:
//
// len(mockedfileInstaller.installContentCalls())
func (mock *fileInstallerMock) installContentCalls() []struct {
Reader io.Reader
S string
V os.FileMode
} {
var calls []struct {
Reader io.Reader
S string
V os.FileMode
}
mock.lockinstallContent.RLock()
calls = mock.calls.installContent
mock.lockinstallContent.RUnlock()
return calls
}
// installFile calls installFileFunc.
func (mock *fileInstallerMock) installFile(s1 string, s2 string) (os.FileMode, error) {
if mock.installFileFunc == nil {
panic("fileInstallerMock.installFileFunc: method is nil but fileInstaller.installFile was just called")
}
callInfo := struct {
S1 string
S2 string
}{
S1: s1,
S2: s2,
}
mock.lockinstallFile.Lock()
mock.calls.installFile = append(mock.calls.installFile, callInfo)
mock.lockinstallFile.Unlock()
return mock.installFileFunc(s1, s2)
}
// installFileCalls gets all the calls that were made to installFile.
// Check the length with:
//
// len(mockedfileInstaller.installFileCalls())
func (mock *fileInstallerMock) installFileCalls() []struct {
S1 string
S2 string
} {
var calls []struct {
S1 string
S2 string
}
mock.lockinstallFile.RLock()
calls = mock.calls.installFile
mock.lockinstallFile.RUnlock()
return calls
}
// installSymlink calls installSymlinkFunc.
func (mock *fileInstallerMock) installSymlink(s1 string, s2 string) error {
if mock.installSymlinkFunc == nil {
panic("fileInstallerMock.installSymlinkFunc: method is nil but fileInstaller.installSymlink was just called")
}
callInfo := struct {
S1 string
S2 string
}{
S1: s1,
S2: s2,
}
mock.lockinstallSymlink.Lock()
mock.calls.installSymlink = append(mock.calls.installSymlink, callInfo)
mock.lockinstallSymlink.Unlock()
return mock.installSymlinkFunc(s1, s2)
}
// installSymlinkCalls gets all the calls that were made to installSymlink.
// Check the length with:
//
// len(mockedfileInstaller.installSymlinkCalls())
func (mock *fileInstallerMock) installSymlinkCalls() []struct {
S1 string
S2 string
} {
var calls []struct {
S1 string
S2 string
}
mock.lockinstallSymlink.RLock()
calls = mock.calls.installSymlink
mock.lockinstallSymlink.RUnlock()
return calls
}

View File

@@ -1,168 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package installer
import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
//go:generate moq -rm -fmt=goimports -out installer_mock.go . Installer
type Installer interface {
Install(string) error
}
type toolkitInstaller struct {
logger logger.Interface
ignoreErrors bool
sourceRoot string
artifactRoot *artifactRoot
ensureTargetDirectory Installer
}
var _ Installer = (*toolkitInstaller)(nil)
// New creates a toolkit installer with the specified options.
func New(opts ...Option) (Installer, error) {
t := &toolkitInstaller{}
for _, opt := range opts {
opt(t)
}
if t.logger == nil {
t.logger = logger.New()
}
if t.sourceRoot == "" {
t.sourceRoot = "/"
}
if t.artifactRoot == nil {
artifactRoot, err := newArtifactRoot(t.logger, t.sourceRoot)
if err != nil {
return nil, err
}
t.artifactRoot = artifactRoot
}
if t.ensureTargetDirectory == nil {
t.ensureTargetDirectory = t.createDirectory()
}
return t, nil
}
// Install ensures that the required toolkit files are installed in the specified directory.
func (t *toolkitInstaller) Install(destDir string) error {
var installers []Installer
installers = append(installers, t.ensureTargetDirectory)
libraries, err := t.collectLibraries()
if err != nil {
return fmt.Errorf("failed to collect libraries: %w", err)
}
installers = append(installers, libraries...)
executables, err := t.collectExecutables(destDir)
if err != nil {
return fmt.Errorf("failed to collect executables: %w", err)
}
installers = append(installers, executables...)
var errs error
for _, i := range installers {
errs = errors.Join(errs, i.Install(destDir))
}
return errs
}
type symlink struct {
linkname string
target string
}
func (s symlink) Install(destDir string) error {
symlinkPath := filepath.Join(destDir, s.linkname)
return installSymlink(s.target, symlinkPath)
}
//go:generate moq -rm -fmt=goimports -out file-installer_mock.go . fileInstaller
type fileInstaller interface {
installContent(io.Reader, string, os.FileMode) error
installFile(string, string) (os.FileMode, error)
installSymlink(string, string) error
}
var installSymlink = installSymlinkStub
func installSymlinkStub(target string, link string) error {
err := os.Symlink(target, link)
if err != nil {
return fmt.Errorf("error creating symlink '%v' => '%v': %v", link, target, err)
}
return nil
}
var installFile = installFileStub
func installFileStub(src string, dest string) (os.FileMode, error) {
sourceInfo, err := os.Stat(src)
if err != nil {
return 0, fmt.Errorf("error getting file info for '%v': %v", src, err)
}
source, err := os.Open(src)
if err != nil {
return 0, fmt.Errorf("error opening source: %w", err)
}
defer source.Close()
mode := sourceInfo.Mode()
if err := installContent(source, dest, mode); err != nil {
return 0, err
}
return mode, nil
}
var installContent = installContentStub
func installContentStub(content io.Reader, dest string, mode fs.FileMode) error {
destination, err := os.Create(dest)
if err != nil {
return fmt.Errorf("error creating destination: %w", err)
}
defer destination.Close()
_, err = io.Copy(destination, content)
if err != nil {
return fmt.Errorf("error copying file: %w", err)
}
err = os.Chmod(dest, mode)
if err != nil {
return fmt.Errorf("error setting mode for '%v': %v", dest, err)
}
return nil
}

View File

@@ -1,74 +0,0 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package installer
import (
"sync"
)
// Ensure, that InstallerMock does implement Installer.
// If this is not the case, regenerate this file with moq.
var _ Installer = &InstallerMock{}
// InstallerMock is a mock implementation of Installer.
//
// func TestSomethingThatUsesInstaller(t *testing.T) {
//
// // make and configure a mocked Installer
// mockedInstaller := &InstallerMock{
// InstallFunc: func(s string) error {
// panic("mock out the Install method")
// },
// }
//
// // use mockedInstaller in code that requires Installer
// // and then make assertions.
//
// }
type InstallerMock struct {
// InstallFunc mocks the Install method.
InstallFunc func(s string) error
// calls tracks calls to the methods.
calls struct {
// Install holds details about calls to the Install method.
Install []struct {
// S is the s argument value.
S string
}
}
lockInstall sync.RWMutex
}
// Install calls InstallFunc.
func (mock *InstallerMock) Install(s string) error {
if mock.InstallFunc == nil {
panic("InstallerMock.InstallFunc: method is nil but Installer.Install was just called")
}
callInfo := struct {
S string
}{
S: s,
}
mock.lockInstall.Lock()
mock.calls.Install = append(mock.calls.Install, callInfo)
mock.lockInstall.Unlock()
return mock.InstallFunc(s)
}
// InstallCalls gets all the calls that were made to Install.
// Check the length with:
//
// len(mockedInstaller.InstallCalls())
func (mock *InstallerMock) InstallCalls() []struct {
S string
} {
var calls []struct {
S string
}
mock.lockInstall.RLock()
calls = mock.calls.Install
mock.lockInstall.RUnlock()
return calls
}

View File

@@ -1,251 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package installer
import (
"bytes"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
func TestToolkitInstaller(t *testing.T) {
logger, _ := testlog.NewNullLogger()
type contentCall struct {
wrapper string
path string
mode fs.FileMode
}
var contentCalls []contentCall
installer := &fileInstallerMock{
installFileFunc: func(s1, s2 string) (os.FileMode, error) {
return 0666, nil
},
installContentFunc: func(reader io.Reader, s string, fileMode fs.FileMode) error {
var b bytes.Buffer
if _, err := b.ReadFrom(reader); err != nil {
return err
}
contents := contentCall{
wrapper: b.String(),
path: s,
mode: fileMode,
}
contentCalls = append(contentCalls, contents)
return nil
},
installSymlinkFunc: func(s1, s2 string) error {
return nil
},
}
installFile = installer.installFile
installContent = installer.installContent
installSymlink = installer.installSymlink
root := "/artifacts/test"
libraries := &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
switch s {
case "libnvidia-container.so.1":
return []string{filepath.Join(root, "libnvidia-container.so.987.65.43")}, nil
case "libnvidia-container-go.so.1":
return []string{filepath.Join(root, "libnvidia-container-go.so.1.23.4")}, nil
}
return nil, fmt.Errorf("%v not found", s)
},
}
executables := &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
switch s {
case "nvidia-container-runtime.cdi":
fallthrough
case "nvidia-container-runtime.legacy":
fallthrough
case "nvidia-container-runtime":
fallthrough
case "nvidia-ctk":
fallthrough
case "nvidia-container-cli":
fallthrough
case "nvidia-container-runtime-hook":
fallthrough
case "nvidia-cdi-hook":
return []string{filepath.Join(root, "usr/bin", s)}, nil
}
return nil, fmt.Errorf("%v not found", s)
},
}
r := &artifactRoot{
libraries: libraries,
executables: executables,
}
createDirectory := &InstallerMock{
InstallFunc: func(c string) error {
return nil
},
}
i := toolkitInstaller{
logger: logger,
artifactRoot: r,
ensureTargetDirectory: createDirectory,
}
err := i.Install("/foo/bar/baz")
require.NoError(t, err)
require.ElementsMatch(t,
[]struct {
S string
}{
{"/foo/bar/baz"},
},
createDirectory.InstallCalls(),
)
require.ElementsMatch(t,
installer.installFileCalls(),
[]struct {
S1 string
S2 string
}{
{"/artifacts/test/libnvidia-container-go.so.1.23.4", "/foo/bar/baz/libnvidia-container-go.so.1.23.4"},
{"/artifacts/test/libnvidia-container.so.987.65.43", "/foo/bar/baz/libnvidia-container.so.987.65.43"},
{"/artifacts/test/usr/bin/nvidia-container-runtime.cdi", "/foo/bar/baz/nvidia-container-runtime.cdi.real"},
{"/artifacts/test/usr/bin/nvidia-container-runtime.legacy", "/foo/bar/baz/nvidia-container-runtime.legacy.real"},
{"/artifacts/test/usr/bin/nvidia-container-runtime", "/foo/bar/baz/nvidia-container-runtime.real"},
{"/artifacts/test/usr/bin/nvidia-ctk", "/foo/bar/baz/nvidia-ctk.real"},
{"/artifacts/test/usr/bin/nvidia-cdi-hook", "/foo/bar/baz/nvidia-cdi-hook.real"},
{"/artifacts/test/usr/bin/nvidia-container-cli", "/foo/bar/baz/nvidia-container-cli.real"},
{"/artifacts/test/usr/bin/nvidia-container-runtime-hook", "/foo/bar/baz/nvidia-container-runtime-hook.real"},
},
)
require.ElementsMatch(t,
installer.installSymlinkCalls(),
[]struct {
S1 string
S2 string
}{
{"libnvidia-container-go.so.1.23.4", "/foo/bar/baz/libnvidia-container-go.so.1"},
{"libnvidia-container.so.987.65.43", "/foo/bar/baz/libnvidia-container.so.1"},
{"nvidia-container-runtime-hook", "/foo/bar/baz/nvidia-container-toolkit"},
},
)
require.ElementsMatch(t,
contentCalls,
[]contentCall{
{
path: "/foo/bar/baz/nvidia-container-runtime",
mode: 0777,
wrapper: `#! /bin/sh
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
if [ "${?}" != "0" ]; then
echo "nvidia driver modules are not yet loaded, invoking runc directly"
exec runc "$@"
fi
PATH=/foo/bar/baz:$PATH \
XDG_CONFIG_HOME=/foo/bar/baz/.config \
/foo/bar/baz/nvidia-container-runtime.real \
"$@"
`,
},
{
path: "/foo/bar/baz/nvidia-container-runtime.cdi",
mode: 0777,
wrapper: `#! /bin/sh
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
if [ "${?}" != "0" ]; then
echo "nvidia driver modules are not yet loaded, invoking runc directly"
exec runc "$@"
fi
PATH=/foo/bar/baz:$PATH \
XDG_CONFIG_HOME=/foo/bar/baz/.config \
/foo/bar/baz/nvidia-container-runtime.cdi.real \
"$@"
`,
},
{
path: "/foo/bar/baz/nvidia-container-runtime.legacy",
mode: 0777,
wrapper: `#! /bin/sh
cat /proc/modules | grep -e "^nvidia " >/dev/null 2>&1
if [ "${?}" != "0" ]; then
echo "nvidia driver modules are not yet loaded, invoking runc directly"
exec runc "$@"
fi
PATH=/foo/bar/baz:$PATH \
XDG_CONFIG_HOME=/foo/bar/baz/.config \
/foo/bar/baz/nvidia-container-runtime.legacy.real \
"$@"
`,
},
{
path: "/foo/bar/baz/nvidia-ctk",
mode: 0777,
wrapper: `#! /bin/sh
PATH=/foo/bar/baz:$PATH \
/foo/bar/baz/nvidia-ctk.real \
"$@"
`,
},
{
path: "/foo/bar/baz/nvidia-cdi-hook",
mode: 0777,
wrapper: `#! /bin/sh
PATH=/foo/bar/baz:$PATH \
/foo/bar/baz/nvidia-cdi-hook.real \
"$@"
`,
},
{
path: "/foo/bar/baz/nvidia-container-cli",
mode: 0777,
wrapper: `#! /bin/sh
LD_LIBRARY_PATH=/foo/bar/baz:$LD_LIBRARY_PATH \
PATH=/foo/bar/baz:$PATH \
/foo/bar/baz/nvidia-container-cli.real \
"$@"
`,
},
{
path: "/foo/bar/baz/nvidia-container-runtime-hook",
mode: 0777,
wrapper: `#! /bin/sh
PATH=/foo/bar/baz:$PATH \
/foo/bar/baz/nvidia-container-runtime-hook.real \
-config /foo/bar/baz/.config/nvidia-container-runtime/config.toml \
"$@"
`,
},
},
)
}

View File

@@ -1,73 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package installer
import (
"path/filepath"
log "github.com/sirupsen/logrus"
)
// collectLibraries locates and installs the libraries that are part of
// the nvidia-container-toolkit.
// A predefined set of library candidates are considered, with the first one
// resulting in success being installed to the toolkit folder. The install process
// resolves the symlink for the library and copies the versioned library itself.
func (t *toolkitInstaller) collectLibraries() ([]Installer, error) {
requiredLibraries := []string{
"libnvidia-container.so.1",
"libnvidia-container-go.so.1",
}
var installers []Installer
for _, l := range requiredLibraries {
libraryPath, err := t.artifactRoot.findLibrary(l)
if err != nil {
if t.ignoreErrors {
log.Errorf("Ignoring error: %v", err)
continue
}
return nil, err
}
installers = append(installers, library(libraryPath))
if filepath.Base(libraryPath) == l {
continue
}
link := symlink{
linkname: l,
target: filepath.Base(libraryPath),
}
installers = append(installers, link)
}
return installers, nil
}
type library string
// Install copies the library l to the destination folder.
// The same basename is used in the destination folder.
func (l library) Install(destinationDir string) error {
dest := filepath.Join(destinationDir, filepath.Base(string(l)))
_, err := installFile(string(l), dest)
return err
}

View File

@@ -1,47 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package installer
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
type Option func(*toolkitInstaller)
func WithLogger(logger logger.Interface) Option {
return func(ti *toolkitInstaller) {
ti.logger = logger
}
}
func WithArtifactRoot(artifactRoot *artifactRoot) Option {
return func(ti *toolkitInstaller) {
ti.artifactRoot = artifactRoot
}
}
func WithIgnoreErrors(ignoreErrors bool) Option {
return func(ti *toolkitInstaller) {
ti.ignoreErrors = ignoreErrors
}
}
// WithSourceRoot sets the root directory for locating artifacts to be installed.
func WithSourceRoot(sourceRoot string) Option {
return func(ti *toolkitInstaller) {
ti.sourceRoot = sourceRoot
}
}

View File

@@ -1,40 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package toolkit
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
// An Option provides a mechanism to configure an Installer.
type Option func(*Installer)
func WithLogger(logger logger.Interface) Option {
return func(i *Installer) {
i.logger = logger
}
}
func WithToolkitRoot(toolkitRoot string) Option {
return func(i *Installer) {
i.toolkitRoot = toolkitRoot
}
}
func WithSourceRoot(sourceRoot string) Option {
return func(i *Installer) {
i.sourceRoot = sourceRoot
}
}

View File

@@ -1,537 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package toolkit
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/pkg/parser"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit/installer"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
)
const (
// DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory
DefaultNvidiaDriverRoot = "/run/nvidia/driver"
configFilename = "config.toml"
)
type cdiOptions struct {
Enabled bool
outputDir string
kind string
vendor string
class string
}
type Options struct {
DriverRoot string
DevRoot string
DriverRootCtrPath string
DevRootCtrPath string
ContainerRuntimeMode string
ContainerRuntimeDebug string
ContainerRuntimeLogLevel string
ContainerRuntimeModesCdiDefaultKind string
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
ContainerRuntimeRuntimes cli.StringSlice
ContainerRuntimeHookSkipModeDetection bool
ContainerCLIDebug string
// CDI stores the CDI options for the toolkit.
CDI cdiOptions
createDeviceNodes cli.StringSlice
acceptNVIDIAVisibleDevicesWhenUnprivileged bool
acceptNVIDIAVisibleDevicesAsVolumeMounts bool
ignoreErrors bool
optInFeatures cli.StringSlice
}
func Flags(opts *Options) []cli.Flag {
flags := []cli.Flag{
&cli.StringFlag{
Name: "driver-root",
Aliases: []string{"nvidia-driver-root"},
Value: DefaultNvidiaDriverRoot,
Destination: &opts.DriverRoot,
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
},
&cli.StringFlag{
Name: "driver-root-ctr-path",
Value: DefaultNvidiaDriverRoot,
Destination: &opts.DriverRootCtrPath,
EnvVars: []string{"DRIVER_ROOT_CTR_PATH"},
},
&cli.StringFlag{
Name: "dev-root",
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
Destination: &opts.DevRoot,
EnvVars: []string{"NVIDIA_DEV_ROOT", "DEV_ROOT"},
},
&cli.StringFlag{
Name: "dev-root-ctr-path",
Usage: "Specify the root where `/dev` is located in the container. If this is not specified, the driver-root-ctr-path is assumed.",
Destination: &opts.DevRootCtrPath,
EnvVars: []string{"DEV_ROOT_CTR_PATH"},
},
&cli.StringFlag{
Name: "nvidia-container-runtime.debug",
Aliases: []string{"nvidia-container-runtime-debug"},
Usage: "Specify the location of the debug log file for the NVIDIA Container Runtime",
Destination: &opts.ContainerRuntimeDebug,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"},
},
&cli.StringFlag{
Name: "nvidia-container-runtime.log-level",
Aliases: []string{"nvidia-container-runtime-debug-log-level"},
Destination: &opts.ContainerRuntimeLogLevel,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"},
},
&cli.StringFlag{
Name: "nvidia-container-runtime.mode",
Aliases: []string{"nvidia-container-runtime-mode"},
Destination: &opts.ContainerRuntimeMode,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODE"},
},
&cli.StringFlag{
Name: "nvidia-container-runtime.modes.cdi.default-kind",
Destination: &opts.ContainerRuntimeModesCdiDefaultKind,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND"},
},
&cli.StringSliceFlag{
Name: "nvidia-container-runtime.modes.cdi.annotation-prefixes",
Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
},
&cli.StringSliceFlag{
Name: "nvidia-container-runtime.runtimes",
Destination: &opts.ContainerRuntimeRuntimes,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_RUNTIMES"},
},
&cli.BoolFlag{
Name: "nvidia-container-runtime-hook.skip-mode-detection",
Value: true,
Destination: &opts.ContainerRuntimeHookSkipModeDetection,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION"},
},
&cli.StringFlag{
Name: "nvidia-container-cli.debug",
Aliases: []string{"nvidia-container-cli-debug"},
Usage: "Specify the location of the debug log file for the NVIDIA Container CLI",
Destination: &opts.ContainerCLIDebug,
EnvVars: []string{"NVIDIA_CONTAINER_CLI_DEBUG"},
},
&cli.BoolFlag{
Name: "accept-nvidia-visible-devices-envvar-when-unprivileged",
Usage: "Set the accept-nvidia-visible-devices-envvar-when-unprivileged config option",
Value: true,
Destination: &opts.acceptNVIDIAVisibleDevicesWhenUnprivileged,
EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_ENVVAR_WHEN_UNPRIVILEGED"},
},
&cli.BoolFlag{
Name: "accept-nvidia-visible-devices-as-volume-mounts",
Usage: "Set the accept-nvidia-visible-devices-as-volume-mounts config option",
Destination: &opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS"},
},
&cli.BoolFlag{
Name: "cdi-enabled",
Aliases: []string{"enable-cdi"},
Usage: "enable the generation of a CDI specification",
Destination: &opts.CDI.Enabled,
EnvVars: []string{"CDI_ENABLED", "ENABLE_CDI"},
},
&cli.StringFlag{
Name: "cdi-output-dir",
Usage: "the directory where the CDI output files are to be written. If this is set to '', no CDI specification is generated.",
Value: "/var/run/cdi",
Destination: &opts.CDI.outputDir,
EnvVars: []string{"CDI_OUTPUT_DIR"},
},
&cli.StringFlag{
Name: "cdi-kind",
Usage: "the vendor string to use for the generated CDI specification",
Value: "management.nvidia.com/gpu",
Destination: &opts.CDI.kind,
EnvVars: []string{"CDI_KIND"},
},
&cli.BoolFlag{
Name: "ignore-errors",
Usage: "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only.",
Hidden: true,
Destination: &opts.ignoreErrors,
},
&cli.StringSliceFlag{
Name: "create-device-nodes",
Usage: "(Only applicable with --cdi-enabled) specifies which device nodes should be created. If any one of the options is set to '' or 'none', no device nodes will be created.",
Value: cli.NewStringSlice("control"),
Destination: &opts.createDeviceNodes,
EnvVars: []string{"CREATE_DEVICE_NODES"},
},
&cli.StringSliceFlag{
Name: "opt-in-features",
Hidden: true,
Destination: &opts.optInFeatures,
EnvVars: []string{"NVIDIA_CONTAINER_TOOLKIT_OPT_IN_FEATURES"},
},
}
return flags
}
// An Installer is used to install the NVIDIA Container Toolkit from the toolkit container.
type Installer struct {
logger logger.Interface
sourceRoot string
// toolkitRoot specifies the destination path at which the toolkit is installed.
toolkitRoot string
}
// NewInstaller creates an installer for the NVIDIA Container Toolkit.
func NewInstaller(opts ...Option) *Installer {
i := &Installer{}
for _, opt := range opts {
opt(i)
}
if i.logger == nil {
i.logger = logger.New()
}
return i
}
// ValidateOptions checks whether the specified options are valid
func (t *Installer) ValidateOptions(opts *Options) error {
if t == nil {
return fmt.Errorf("toolkit installer is not initilized")
}
if t.toolkitRoot == "" {
return fmt.Errorf("invalid --toolkit-root option: %v", t.toolkitRoot)
}
vendor, class := parser.ParseQualifier(opts.CDI.kind)
if err := parser.ValidateVendorName(vendor); err != nil {
return fmt.Errorf("invalid CDI vendor name: %v", err)
}
if err := parser.ValidateClassName(class); err != nil {
return fmt.Errorf("invalid CDI class name: %v", err)
}
opts.CDI.vendor = vendor
opts.CDI.class = class
if opts.CDI.Enabled && opts.CDI.outputDir == "" {
t.logger.Warning("Skipping CDI spec generation (no output directory specified)")
opts.CDI.Enabled = false
}
isDisabled := false
for _, mode := range opts.createDeviceNodes.Value() {
if mode != "" && mode != "none" && mode != "control" {
return fmt.Errorf("invalid --create-device-nodes value: %v", mode)
}
if mode == "" || mode == "none" {
isDisabled = true
break
}
}
if !opts.CDI.Enabled && !isDisabled {
t.logger.Info("disabling device node creation since --cdi-enabled=false")
isDisabled = true
}
if isDisabled {
opts.createDeviceNodes = *cli.NewStringSlice()
}
return nil
}
// Install installs the components of the NVIDIA container toolkit.
// Any existing installation is removed.
func (t *Installer) Install(cli *cli.Context, opts *Options) error {
if t == nil {
return fmt.Errorf("toolkit installer is not initilized")
}
t.logger.Infof("Installing NVIDIA container toolkit to '%v'", t.toolkitRoot)
t.logger.Infof("Removing existing NVIDIA container toolkit installation")
err := os.RemoveAll(t.toolkitRoot)
if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error removing toolkit directory: %v", err)
} else if err != nil {
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err))
}
// Create a toolkit installer to actually install the toolkit components.
toolkit, err := installer.New(
installer.WithLogger(t.logger),
installer.WithSourceRoot(t.sourceRoot),
installer.WithIgnoreErrors(opts.ignoreErrors),
)
if err != nil {
if !opts.ignoreErrors {
return fmt.Errorf("could not create toolkit installer: %w", err)
}
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not create toolkit installer: %w", err))
}
if err := toolkit.Install(t.toolkitRoot); err != nil {
if !opts.ignoreErrors {
return fmt.Errorf("could not install toolkit components: %w", err)
}
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not install toolkit components: %w", err))
}
err = t.installToolkitConfig(cli, opts)
if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
} else if err != nil {
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err))
}
err = t.createDeviceNodes(opts)
if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error creating device nodes: %v", err)
} else if err != nil {
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error creating device nodes: %v", err))
}
nvidiaCDIHookPath := filepath.Join(t.toolkitRoot, "nvidia-cdi-hook")
err = t.generateCDISpec(opts, nvidiaCDIHookPath)
if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error generating CDI specification: %v", err)
} else if err != nil {
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error generating CDI specification: %v", err))
}
return nil
}
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
// that the settings are updated to match the desired install and nvidia driver directories.
func (t *Installer) installToolkitConfig(c *cli.Context, opts *Options) error {
toolkitConfigDir := filepath.Join(t.toolkitRoot, ".config", "nvidia-container-runtime")
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
t.logger.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
err := t.createDirectories(toolkitConfigDir)
if err != nil && !opts.ignoreErrors {
return fmt.Errorf("could not create required directories: %v", err)
} else if err != nil {
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err))
}
nvidiaContainerCliExecutablePath := filepath.Join(t.toolkitRoot, "nvidia-container-cli")
nvidiaCTKPath := filepath.Join(t.toolkitRoot, "nvidia-ctk")
nvidiaContainerRuntimeHookPath := filepath.Join(t.toolkitRoot, "nvidia-container-runtime-hook")
cfg, err := config.New()
if err != nil {
return fmt.Errorf("could not open source config file: %v", err)
}
targetConfig, err := os.Create(toolkitConfigPath)
if err != nil {
return fmt.Errorf("could not create target config file: %v", err)
}
defer targetConfig.Close()
// Read the ldconfig path from the config as this may differ per platform
// On ubuntu-based systems this ends in `.real`
ldconfigPath := fmt.Sprintf("%s", cfg.GetDefault("nvidia-container-cli.ldconfig", "/sbin/ldconfig"))
// Use the driver run root as the root:
driverLdconfigPath := config.NormalizeLDConfigPath("@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/")))
configValues := map[string]interface{}{
// Set the options in the root toml table
"accept-nvidia-visible-devices-envvar-when-unprivileged": opts.acceptNVIDIAVisibleDevicesWhenUnprivileged,
"accept-nvidia-visible-devices-as-volume-mounts": opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
// Set the nvidia-container-cli options
"nvidia-container-cli.root": opts.DriverRoot,
"nvidia-container-cli.path": nvidiaContainerCliExecutablePath,
"nvidia-container-cli.ldconfig": driverLdconfigPath,
// Set nvidia-ctk options
"nvidia-ctk.path": nvidiaCTKPath,
// Set the nvidia-container-runtime-hook options
"nvidia-container-runtime-hook.path": nvidiaContainerRuntimeHookPath,
"nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection,
}
toolkitRuntimeList := opts.ContainerRuntimeRuntimes.Value()
if len(toolkitRuntimeList) > 0 {
configValues["nvidia-container-runtime.runtimes"] = toolkitRuntimeList
}
for _, optInFeature := range opts.optInFeatures.Value() {
configValues["features."+optInFeature] = true
}
for key, value := range configValues {
cfg.Set(key, value)
}
// Set the optional config options
optionalConfigValues := map[string]interface{}{
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode,
"nvidia-container-runtime.modes.cdi.annotation-prefixes": opts.ContainerRuntimeModesCDIAnnotationPrefixes,
"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind,
"nvidia-container-runtime.runtimes": opts.ContainerRuntimeRuntimes,
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
}
for key, value := range optionalConfigValues {
if !c.IsSet(key) {
t.logger.Infof("Skipping unset option: %v", key)
continue
}
if value == nil {
t.logger.Infof("Skipping option with nil value: %v", key)
continue
}
switch v := value.(type) {
case string:
if v == "" {
continue
}
case cli.StringSlice:
if len(v.Value()) == 0 {
continue
}
value = v.Value()
default:
t.logger.Warningf("Unexpected type for option %v=%v: %T", key, value, v)
}
cfg.Set(key, value)
}
if _, err := cfg.WriteTo(targetConfig); err != nil {
return fmt.Errorf("error writing config: %v", err)
}
os.Stdout.WriteString("Using config:\n")
if _, err = cfg.WriteTo(os.Stdout); err != nil {
t.logger.Warningf("Failed to output config to STDOUT: %v", err)
}
return nil
}
func (t *Installer) createDirectories(dir ...string) error {
for _, d := range dir {
t.logger.Infof("Creating directory '%v'", d)
err := os.MkdirAll(d, 0755)
if err != nil {
return fmt.Errorf("error creating directory: %v", err)
}
}
return nil
}
func (t *Installer) createDeviceNodes(opts *Options) error {
modes := opts.createDeviceNodes.Value()
if len(modes) == 0 {
return nil
}
devices, err := nvdevices.New(
nvdevices.WithDevRoot(opts.DevRootCtrPath),
)
if err != nil {
return fmt.Errorf("failed to create library: %v", err)
}
for _, mode := range modes {
t.logger.Infof("Creating %v device nodes at %v", mode, opts.DevRootCtrPath)
if mode != "control" {
t.logger.Warningf("Unrecognised device mode: %v", mode)
continue
}
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create control device nodes: %v", err)
}
}
return nil
}
// generateCDISpec generates a CDI spec for use in management containers
func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) error {
if !opts.CDI.Enabled {
return nil
}
t.logger.Info("Generating CDI spec for management containers")
cdilib, err := nvcdi.New(
nvcdi.WithLogger(t.logger),
nvcdi.WithMode(nvcdi.ModeManagement),
nvcdi.WithDriverRoot(opts.DriverRootCtrPath),
nvcdi.WithDevRoot(opts.DevRootCtrPath),
nvcdi.WithNVIDIACDIHookPath(nvidiaCDIHookPath),
nvcdi.WithVendor(opts.CDI.vendor),
nvcdi.WithClass(opts.CDI.class),
)
if err != nil {
return fmt.Errorf("failed to create CDI library for management containers: %v", err)
}
spec, err := cdilib.GetSpec()
if err != nil {
return fmt.Errorf("failed to genereate CDI spec for management containers: %v", err)
}
transformer := transformroot.NewDriverTransformer(
transformroot.WithDriverRoot(opts.DriverRootCtrPath),
transformroot.WithTargetDriverRoot(opts.DriverRoot),
transformroot.WithDevRoot(opts.DevRootCtrPath),
transformroot.WithTargetDevRoot(opts.DevRoot),
)
if err := transformer.Transform(spec.Raw()); err != nil {
return fmt.Errorf("failed to transform driver root in CDI spec: %v", err)
}
name, err := cdi.GenerateNameForSpec(spec.Raw())
if err != nil {
return fmt.Errorf("failed to generate CDI name for management containers: %v", err)
}
err = spec.Save(filepath.Join(opts.CDI.outputDir, name))
if err != nil {
return fmt.Errorf("failed to save CDI spec for management containers: %v", err)
}
return nil
}

View File

@@ -1,222 +0,0 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package toolkit
import (
"fmt"
"os"
"path/filepath"
"strings"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
func TestInstall(t *testing.T) {
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
logger, _ := testlog.NewNullLogger()
moduleRoot, err := test.GetModuleRoot()
require.NoError(t, err)
artifactRoot := filepath.Join(moduleRoot, "testdata", "installer", "artifacts")
testCases := []struct {
description string
hostRoot string
packageType string
cdiEnabled bool
expectedError error
expectedCdiSpec string
}{
{
hostRoot: "rootfs-empty",
packageType: "deb",
},
{
hostRoot: "rootfs-empty",
packageType: "rpm",
},
{
hostRoot: "rootfs-empty",
packageType: "deb",
cdiEnabled: true,
expectedError: fmt.Errorf("no NVIDIA device nodes found"),
},
{
hostRoot: "rootfs-1",
packageType: "deb",
cdiEnabled: true,
expectedCdiSpec: `---
cdiVersion: 0.5.0
kind: example.com/class
devices:
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: /host/driver/root/dev/nvidia0
- path: /dev/nvidiactl
hostPath: /host/driver/root/dev/nvidiactl
- path: /dev/nvidia-caps-imex-channels/channel0
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel0
- path: /dev/nvidia-caps-imex-channels/channel1
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel1
- path: /dev/nvidia-caps-imex-channels/channel2047
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel2047
containerEdits:
env:
- NVIDIA_VISIBLE_DEVICES=void
hooks:
- hookName: createContainer
path: {{ .toolkitRoot }}/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-symlinks
- --link
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: {{ .toolkitRoot }}/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- update-ldcache
- --folder
- /lib/x86_64-linux-gnu
env:
- NVIDIA_CTK_DEBUG=false
mounts:
- hostPath: /host/driver/root/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
}
for _, tc := range testCases {
// hostRoot := filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot)
t.Run(tc.description, func(t *testing.T) {
testRoot := t.TempDir()
toolkitRoot := filepath.Join(testRoot, "toolkit-test")
cdiOutputDir := filepath.Join(moduleRoot, "toolkit-test", "/var/cdi")
sourceRoot := filepath.Join(artifactRoot, tc.packageType)
options := Options{
DriverRoot: "/host/driver/root",
DriverRootCtrPath: filepath.Join(moduleRoot, "testdata", "lookup", tc.hostRoot),
CDI: cdiOptions{
Enabled: tc.cdiEnabled,
outputDir: cdiOutputDir,
kind: "example.com/class",
},
}
ti := NewInstaller(
WithLogger(logger),
WithToolkitRoot(toolkitRoot),
WithSourceRoot(sourceRoot),
)
require.NoError(t, ti.ValidateOptions(&options))
err := ti.Install(&cli.Context{}, &options)
if tc.expectedError == nil {
require.NoError(t, err)
} else {
require.Contains(t, err.Error(), tc.expectedError.Error())
}
require.DirExists(t, toolkitRoot)
requireSymlink(t, toolkitRoot, "libnvidia-container.so.1", "libnvidia-container.so.99.88.77")
requireSymlink(t, toolkitRoot, "libnvidia-container-go.so.1", "libnvidia-container-go.so.99.88.77")
requireWrappedExecutable(t, toolkitRoot, "nvidia-cdi-hook")
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-cli")
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime")
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime-hook")
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime.cdi")
requireWrappedExecutable(t, toolkitRoot, "nvidia-container-runtime.legacy")
requireWrappedExecutable(t, toolkitRoot, "nvidia-ctk")
requireSymlink(t, toolkitRoot, "nvidia-container-toolkit", "nvidia-container-runtime-hook")
// TODO: Add checks for wrapper contents
// grep -q -E "nvidia driver modules are not yet loaded, invoking runc directly" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime"
// grep -q -E "exec runc \".@\"" "${shared_dir}/usr/local/nvidia/toolkit/nvidia-container-runtime"
require.DirExists(t, filepath.Join(toolkitRoot, ".config"))
require.DirExists(t, filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime"))
require.FileExists(t, filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime", "config.toml"))
cfgToml, err := config.New(config.WithConfigFile(filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime", "config.toml")))
require.NoError(t, err)
cfg, err := cfgToml.Config()
require.NoError(t, err)
// Ensure that the config file has the required contents.
// TODO: Add checks for additional config options.
require.Equal(t, "/host/driver/root", cfg.NVIDIAContainerCLIConfig.Root)
require.Equal(t, "@/host/driver/root/sbin/ldconfig", string(cfg.NVIDIAContainerCLIConfig.Ldconfig))
require.EqualValues(t, filepath.Join(toolkitRoot, "nvidia-container-cli"), cfg.NVIDIAContainerCLIConfig.Path)
require.EqualValues(t, filepath.Join(toolkitRoot, "nvidia-ctk"), cfg.NVIDIACTKConfig.Path)
if len(tc.expectedCdiSpec) > 0 {
cdiSpecFile := filepath.Join(cdiOutputDir, "example.com-class.yaml")
require.FileExists(t, cdiSpecFile)
info, err := os.Stat(cdiSpecFile)
require.NoError(t, err)
require.NotZero(t, info.Mode()&0004)
contents, err := os.ReadFile(cdiSpecFile)
require.NoError(t, err)
require.Equal(t, strings.ReplaceAll(tc.expectedCdiSpec, "{{ .toolkitRoot }}", toolkitRoot), string(contents))
}
})
}
}
func requireWrappedExecutable(t *testing.T, toolkitRoot string, expectedExecutable string) {
requireExecutable(t, toolkitRoot, expectedExecutable)
requireExecutable(t, toolkitRoot, expectedExecutable+".real")
}
func requireExecutable(t *testing.T, toolkitRoot string, expectedExecutable string) {
executable := filepath.Join(toolkitRoot, expectedExecutable)
require.FileExists(t, executable)
info, err := os.Lstat(executable)
require.NoError(t, err)
require.Zero(t, info.Mode()&os.ModeSymlink)
require.NotZero(t, info.Mode()&0111)
}
func requireSymlink(t *testing.T, toolkitRoot string, expectedLink string, expectedTarget string) {
link := filepath.Join(toolkitRoot, expectedLink)
require.FileExists(t, link)
target, err := symlinks.Resolve(link)
require.NoError(t, err)
require.Equal(t, expectedTarget, target)
}

View File

@@ -43,7 +43,7 @@ By default, all commands output to `STDOUT`, but specifying the `--output` flag
### Generate CDI specifications
The [Container Device Interface (CDI)](https://tags.cncf.io/container-device-interface) provides
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides
a vendor-agnostic mechanism to make arbitrary devices accessible in containerized environments. To allow NVIDIA devices to be
used in these environments, the NVIDIA Container Toolkit CLI includes functionality to generate a CDI specification for the
available NVIDIA GPUs in a system.

View File

@@ -17,12 +17,11 @@
package cdi
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/list"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {

View File

@@ -23,9 +23,7 @@ import (
"strings"
"github.com/urfave/cli/v2"
cdi "tags.cncf.io/container-device-interface/pkg/parser"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
@@ -44,28 +42,21 @@ type command struct {
}
type options struct {
output string
format string
deviceNameStrategies cli.StringSlice
driverRoot string
devRoot string
nvidiaCDIHookPath string
ldconfigPath string
mode string
vendor string
class string
output string
format string
deviceNameStrategy string
driverRoot string
nvidiaCTKPath string
mode string
vendor string
class string
configSearchPaths cli.StringSlice
librarySearchPaths cli.StringSlice
disabledHooks cli.StringSlice
csv struct {
files cli.StringSlice
ignorePatterns cli.StringSlice
}
// the following are used for dependency injection during spec generation.
nvmllib nvml.Interface
}
// NewCommand constructs a generate-cdi command with the specified logger
@@ -93,74 +84,44 @@ func (m command) build() *cli.Command {
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "config-search-path",
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
Destination: &opts.configSearchPaths,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CONFIG_SEARCH_PATHS"},
},
&cli.StringFlag{
Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
Destination: &opts.output,
EnvVars: []string{"NVIDIA_CTK_CDI_OUTPUT_FILE_PATH"},
},
&cli.StringFlag{
Name: "format",
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
Value: spec.FormatYAML,
Destination: &opts.format,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_OUTPUT_FORMAT"},
},
&cli.StringFlag{
Name: "mode",
Aliases: []string{"discovery-mode"},
Usage: "The mode to use when discovering the available entities. " +
"One of [" + strings.Join(nvcdi.AllModes[string](), " | ") + "]. " +
"If mode is set to 'auto' the mode will be determined based on the system configuration.",
Value: string(nvcdi.ModeAuto),
Name: "mode",
Aliases: []string{"discovery-mode"},
Usage: "The mode to use when discovering the available entities. One of [auto | nvml | wsl]. If mode is set to 'auto' the mode will be determined based on the system configuration.",
Value: nvcdi.ModeAuto,
Destination: &opts.mode,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_MODE"},
},
&cli.StringFlag{
Name: "dev-root",
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
Destination: &opts.devRoot,
EnvVars: []string{"NVIDIA_CTK_DEV_ROOT"},
},
&cli.StringSliceFlag{
Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
Destination: &opts.deviceNameStrategies,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_DEVICE_NAME_STRATEGIES"},
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
Value: nvcdi.DeviceNameStrategyIndex,
Destination: &opts.deviceNameStrategy,
},
&cli.StringFlag{
Name: "driver-root",
Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.",
Destination: &opts.driverRoot,
EnvVars: []string{"NVIDIA_CTK_DRIVER_ROOT"},
},
&cli.StringSliceFlag{
Name: "library-search-path",
Usage: "Specify the path to search for libraries when discovering the entities that should be included in the CDI specification.\n\tNote: This option only applies to CSV mode.",
Destination: &opts.librarySearchPaths,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_LIBRARY_SEARCH_PATHS"},
},
&cli.StringFlag{
Name: "nvidia-cdi-hook-path",
Aliases: []string{"nvidia-ctk-path"},
Usage: "Specify the path to use for the nvidia-cdi-hook in the generated CDI specification. " +
"If not specified, the PATH will be searched for `nvidia-cdi-hook`. " +
"NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.",
Destination: &opts.nvidiaCDIHookPath,
EnvVars: []string{"NVIDIA_CTK_CDI_HOOK_PATH"},
},
&cli.StringFlag{
Name: "ldconfig-path",
Usage: "Specify the path to use for ldconfig in the generated CDI specification",
Destination: &opts.ldconfigPath,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_LDCONFIG_PATH"},
Name: "nvidia-ctk-path",
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
Destination: &opts.nvidiaCTKPath,
},
&cli.StringFlag{
Name: "vendor",
@@ -168,7 +129,6 @@ func (m command) build() *cli.Command {
Usage: "the vendor string to use for the generated CDI specification.",
Value: "nvidia.com",
Destination: &opts.vendor,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_VENDOR"},
},
&cli.StringFlag{
Name: "class",
@@ -176,30 +136,17 @@ func (m command) build() *cli.Command {
Usage: "the class string to use for the generated CDI specification.",
Value: "gpu",
Destination: &opts.class,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CLASS"},
},
&cli.StringSliceFlag{
Name: "csv.file",
Usage: "The path to the list of CSV files to use when generating the CDI specification in CSV mode.",
Value: cli.NewStringSlice(csv.DefaultFileList()...),
Destination: &opts.csv.files,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CSV_FILES"},
},
&cli.StringSliceFlag{
Name: "csv.ignore-pattern",
Usage: "specify a pattern the CSV mount specifications.",
Usage: "Specify a pattern the CSV mount specifications.",
Destination: &opts.csv.ignorePatterns,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CSV_IGNORE_PATTERNS"},
},
&cli.StringSliceFlag{
Name: "disable-hook",
Aliases: []string{"disable-hooks"},
Usage: "specify a specific hook to skip when generating CDI " +
"specifications. This can be specified multiple times and the " +
"special hook name 'all' can be used ensure that the generated " +
"CDI specification does not include any hooks.",
Destination: &opts.disabledHooks,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_DISABLED_HOOKS"},
},
}
@@ -216,18 +163,22 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
}
opts.mode = strings.ToLower(opts.mode)
if !nvcdi.IsValidMode(opts.mode) {
switch opts.mode {
case nvcdi.ModeAuto:
case nvcdi.ModeCSV:
case nvcdi.ModeNvml:
case nvcdi.ModeWsl:
case nvcdi.ModeManagement:
default:
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
}
for _, strategy := range opts.deviceNameStrategies.Value() {
_, err := nvcdi.NewDeviceNamer(strategy)
if err != nil {
return err
}
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
if err != nil {
return err
}
opts.nvidiaCDIHookPath = config.ResolveNVIDIACDIHookPath(m.logger, opts.nvidiaCDIHookPath)
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
if outputFileFormat := formatFromFilename(opts.output); outputFileFormat != "" {
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
@@ -278,36 +229,21 @@ func formatFromFilename(filename string) string {
}
func (m command) generateSpec(opts *options) (spec.Interface, error) {
var deviceNamers []nvcdi.DeviceNamer
for _, strategy := range opts.deviceNameStrategies.Value() {
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err)
}
deviceNamers = append(deviceNamers, deviceNamer)
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err)
}
cdiOptions := []nvcdi.Option{
cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(opts.driverRoot),
nvcdi.WithDevRoot(opts.devRoot),
nvcdi.WithNVIDIACDIHookPath(opts.nvidiaCDIHookPath),
nvcdi.WithLdconfigPath(opts.ldconfigPath),
nvcdi.WithDeviceNamers(deviceNamers...),
nvcdi.WithMode(opts.mode),
nvcdi.WithConfigSearchPaths(opts.configSearchPaths.Value()),
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
nvcdi.WithDeviceNamer(deviceNamer),
nvcdi.WithMode(string(opts.mode)),
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
nvcdi.WithCSVFiles(opts.csv.files.Value()),
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
// We set the following to allow for dependency injection:
nvcdi.WithNvmlLib(opts.nvmllib),
}
for _, hook := range opts.disabledHooks.Value() {
cdiOptions = append(cdiOptions, nvcdi.WithDisabledHook(hook))
}
cdilib, err := nvcdi.New(cdiOptions...)
)
if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err)
}

View File

@@ -1,371 +0,0 @@
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"bytes"
"path/filepath"
"strings"
"testing"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
func TestGenerateSpec(t *testing.T) {
t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true")
moduleRoot, err := test.GetModuleRoot()
require.NoError(t, err)
driverRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1")
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
options options
expectedValidateError error
expectedOptions options
expectedError error
expectedSpec string
}{
{
description: "default",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
hooks:
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-symlinks
- --link
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- enable-cuda-compat
- --host-driver-version=999.88.77
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- update-ldcache
- --folder
- /lib/x86_64-linux-gnu
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- disable-device-node-modification
env:
- NVIDIA_CTK_DEBUG=false
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooks1",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")),
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")),
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
hooks:
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-symlinks
- --link
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- update-ldcache
- --folder
- /lib/x86_64-linux-gnu
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- disable-device-node-modification
env:
- NVIDIA_CTK_DEBUG=false
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooks2",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")),
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")),
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
hooks:
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-symlinks
- --link
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- disable-device-node-modification
env:
- NVIDIA_CTK_DEBUG=false
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooksAll",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("all")),
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("all")),
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
c := command{
logger: logger,
}
err := c.validateFlags(nil, &tc.options)
require.ErrorIs(t, err, tc.expectedValidateError)
require.EqualValues(t, tc.expectedOptions, tc.options)
// Set up a mock server, reusing the DGX A100 mock.
server := dgxa100.New()
// Override the driver version to match the version in our mock filesystem.
server.SystemGetDriverVersionFunc = func() (string, nvml.Return) {
return "999.88.77", nvml.SUCCESS
}
// Set the device count to 1 explicitly since we only have a single device node.
server.DeviceGetCountFunc = func() (int, nvml.Return) {
return 1, nvml.SUCCESS
}
for _, d := range server.Devices {
// TODO: This is not implemented in the mock.
(d.(*dgxa100.Device)).GetMaxMigDeviceCountFunc = func() (int, nvml.Return) {
return 0, nvml.SUCCESS
}
}
tc.options.nvmllib = server
spec, err := c.generateSpec(&tc.options)
require.ErrorIs(t, err, tc.expectedError)
var buf bytes.Buffer
_, err = spec.WriteTo(&buf)
require.NoError(t, err)
require.Equal(t, strings.ReplaceAll(tc.expectedSpec, "{{ .driverRoot }}", driverRoot), buf.String())
})
}
}
// valueOf returns the value of a pointer.
// Note that this does not check for a nil pointer and is only used for testing.
func valueOf[T any](v *T) T {
return *v
}

View File

@@ -17,7 +17,6 @@
package list
import (
"errors"
"fmt"
"github.com/urfave/cli/v2"
@@ -30,9 +29,7 @@ type command struct {
logger logger.Interface
}
type config struct {
cdiSpecDirs cli.StringSlice
}
type config struct{}
// NewCommand constructs a cdi list command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
@@ -58,45 +55,30 @@ func (m command) build() *cli.Command {
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "spec-dir",
Usage: "specify the directories to scan for CDI specifications",
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
Destination: &cfg.cdiSpecDirs,
EnvVars: []string{"NVIDIA_CTK_CDI_SPEC_DIRS"},
},
}
c.Flags = []cli.Flag{}
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *config) error {
if len(cfg.cdiSpecDirs.Value()) == 0 {
return errors.New("at least one CDI specification directory must be specified")
}
return nil
}
func (m command) run(c *cli.Context, cfg *config) error {
registry, err := cdi.NewCache(
cdi.WithAutoRefresh(false),
cdi.WithSpecDirs(cfg.cdiSpecDirs.Value()...),
cdi.WithSpecDirs(cdi.DefaultSpecDirs...),
)
if err != nil {
return fmt.Errorf("failed to create CDI cache: %v", err)
}
_ = registry.Refresh()
if errors := registry.GetErrors(); len(errors) > 0 {
m.logger.Warningf("The following registry errors were reported:")
for k, err := range errors {
m.logger.Warningf("%v: %v", k, err)
}
}
refreshErr := registry.Refresh()
devices := registry.ListDevices()
m.logger.Infof("Found %d CDI devices", len(devices))
if refreshErr != nil {
m.logger.Warningf("Refreshing the CDI registry returned the following error(s): %v", refreshErr)
}
for _, device := range devices {
fmt.Printf("%s\n", device)
}

View File

@@ -26,9 +26,14 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
)
type loadSaver interface {
Load() (spec.Interface, error)
Save(spec.Interface) error
}
type command struct {
logger logger.Interface
}
@@ -40,9 +45,8 @@ type transformOptions struct {
type options struct {
transformOptions
from string
to string
relativeTo string
from string
to string
}
// NewCommand constructs a generate-cdi command with the specified logger
@@ -69,11 +73,6 @@ func (m command) build() *cli.Command {
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "from",
Usage: "specify the root to be transformed",
Destination: &opts.from,
},
&cli.StringFlag{
Name: "input",
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
@@ -86,10 +85,9 @@ func (m command) build() *cli.Command {
Destination: &opts.output,
},
&cli.StringFlag{
Name: "relative-to",
Usage: "specify whether the transform is relative to the host or to the container. One of [ host | container ]",
Value: "host",
Destination: &opts.relativeTo,
Name: "from",
Usage: "specify the root to be transformed",
Destination: &opts.from,
},
&cli.StringFlag{
Name: "to",
@@ -103,12 +101,6 @@ func (m command) build() *cli.Command {
}
func (m command) validateFlags(c *cli.Context, opts *options) error {
switch opts.relativeTo {
case "host":
case "container":
default:
return fmt.Errorf("invalid --relative-to value: %v", opts.relativeTo)
}
return nil
}
@@ -118,10 +110,9 @@ func (m command) run(c *cli.Context, opts *options) error {
return fmt.Errorf("failed to load CDI specification: %w", err)
}
err = transformroot.New(
transformroot.WithRoot(opts.from),
transformroot.WithTargetRoot(opts.to),
transformroot.WithRelativeTo(opts.relativeTo),
err = transform.NewRootTransformer(
opts.from,
opts.to,
).Transform(spec.Raw())
if err != nil {
return fmt.Errorf("failed to transform CDI specification: %w", err)

View File

@@ -17,10 +17,9 @@
package transform
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {

View File

@@ -38,8 +38,7 @@ type command struct {
// options stores the subcommand options
type options struct {
flags.Options
setListSeparator string
sets cli.StringSlice
sets cli.StringSlice
}
// NewCommand constructs an config command with the specified logger
@@ -58,9 +57,6 @@ func (m command) build() *cli.Command {
c := cli.Command{
Name: "config",
Usage: "Interact with the NVIDIA Container Toolkit configuration",
Before: func(ctx *cli.Context) error {
return validateFlags(ctx, &opts)
},
Action: func(ctx *cli.Context) error {
return run(ctx, &opts)
},
@@ -75,21 +71,10 @@ func (m command) build() *cli.Command {
Destination: &opts.Config,
},
&cli.StringSliceFlag{
Name: "set",
Usage: "Set a config value using the pattern 'key[=value]'. " +
"Specifying only 'key' is equivalent to 'key=true' for boolean settings. " +
"This flag can be specified multiple times, but only the last value for a specific " +
"config option is applied. " +
"If the setting represents a list, the elements are colon-separated.",
Name: "set",
Usage: "Set a config value using the pattern key=value. If value is empty, this is equivalent to specifying the same key in unset. This flag can be specified multiple times",
Destination: &opts.sets,
},
&cli.StringFlag{
Name: "set-list-separator",
Usage: "Specify a separator for lists applied using the set command.",
Hidden: true,
Value: ":",
Destination: &opts.setListSeparator,
},
&cli.BoolFlag{
Name: "in-place",
Aliases: []string{"i"},
@@ -111,13 +96,6 @@ func (m command) build() *cli.Command {
return &c
}
func validateFlags(c *cli.Context, opts *options) error {
if opts.setListSeparator == "" {
return fmt.Errorf("set-list-separator must be set")
}
return nil
}
func run(c *cli.Context, opts *options) error {
cfgToml, err := config.New(
config.WithConfigFile(opts.Config),
@@ -127,15 +105,11 @@ func run(c *cli.Context, opts *options) error {
}
for _, set := range opts.sets.Value() {
key, value, err := setFlagToKeyValue(set, opts.setListSeparator)
key, value, err := setFlagToKeyValue(set)
if err != nil {
return fmt.Errorf("invalid --set option %v: %w", set, err)
}
if value == nil {
_ = cfgToml.Delete(key)
} else {
cfgToml.Set(key, value)
}
cfgToml.Set(key, value)
}
if err := opts.EnsureOutputFolder(); err != nil {
@@ -147,10 +121,10 @@ func run(c *cli.Context, opts *options) error {
}
defer output.Close()
if _, err := cfgToml.Save(output); err != nil {
return fmt.Errorf("failed to save config: %v", err)
if err != nil {
return err
}
cfgToml.Save(output)
return nil
}
@@ -161,7 +135,7 @@ var errInvalidFormat = errors.New("invalid format")
// setFlagToKeyValue converts a --set flag to a key-value pair.
// The set flag is of the form key[=value], with the value being optional if key refers to a
// boolean config option.
func setFlagToKeyValue(setFlag string, setListSeparator string) (string, interface{}, error) {
func setFlagToKeyValue(setFlag string) (string, interface{}, error) {
setParts := strings.SplitN(setFlag, "=", 2)
key := setParts[0]
@@ -172,36 +146,24 @@ func setFlagToKeyValue(setFlag string, setListSeparator string) (string, interfa
kind := field.Kind()
if len(setParts) != 2 {
if kind == reflect.Bool || (kind == reflect.Pointer && field.Elem().Kind() == reflect.Bool) {
if kind == reflect.Bool {
return key, true, nil
}
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
}
value := setParts[1]
if kind == reflect.Pointer && value != "nil" {
kind = field.Elem().Kind()
}
switch kind {
case reflect.Pointer:
return key, nil, nil
case reflect.Bool:
b, err := strconv.ParseBool(value)
if err != nil {
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
return key, b, nil
return key, b, err
case reflect.String:
return key, value, nil
case reflect.Slice:
valueParts := []string{value}
for _, sep := range []string{setListSeparator, ","} {
if !strings.Contains(value, sep) {
continue
}
valueParts = strings.Split(value, sep)
break
}
valueParts := strings.Split(value, ",")
switch field.Elem().Kind() {
case reflect.String:
return key, valueParts, nil
@@ -239,7 +201,7 @@ func getStruct(current reflect.Type, paths ...string) (reflect.StructField, erro
if !ok {
continue
}
if strings.SplitN(v, ",", 2)[0] != tomlField {
if v != tomlField {
continue
}
if len(paths) == 1 {

View File

@@ -25,12 +25,11 @@ import (
func TestSetFlagToKeyValue(t *testing.T) {
// TODO: We need to enable this test again since switching to reflect.
testCases := []struct {
description string
setFlag string
setListSeparator string
expectedKey string
expectedValue interface{}
expectedError error
description string
setFlag string
expectedKey string
expectedValue interface{}
expectedError error
}{
{
description: "option not present returns an error",
@@ -107,34 +106,22 @@ func TestSetFlagToKeyValue(t *testing.T) {
expectedValue: []string{"string-value"},
},
{
description: "[]string option returns multiple values",
setFlag: "nvidia-container-cli.environment=first,second",
setListSeparator: ",",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
description: "[]string option returns multiple values",
setFlag: "nvidia-container-cli.environment=first,second",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
},
{
description: "[]string option returns values with equals",
setFlag: "nvidia-container-cli.environment=first=1,second=2",
setListSeparator: ",",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first=1", "second=2"},
},
{
description: "[]string option returns multiple values semi-colon",
setFlag: "nvidia-container-cli.environment=first;second",
setListSeparator: ";",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
description: "[]string option returns values with equals",
setFlag: "nvidia-container-cli.environment=first=1,second=2",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first=1", "second=2"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
if tc.setListSeparator == "" {
tc.setListSeparator = ","
}
k, v, err := setFlagToKeyValue(tc.setFlag, tc.setListSeparator)
k, v, err := setFlagToKeyValue(tc.setFlag)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expectedKey, k)
require.EqualValues(t, tc.expectedValue, v)

View File

@@ -19,11 +19,10 @@ package defaultsubcommand
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -86,7 +85,8 @@ func (m command) run(c *cli.Context, opts *flags.Options) error {
}
defer output.Close()
if _, err = cfgToml.Save(output); err != nil {
_, err = cfgToml.Save(output)
if err != nil {
return fmt.Errorf("failed to write output: %v", err)
}

View File

@@ -17,18 +17,16 @@
package chmod
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/urfave/cli/v2"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -37,8 +35,7 @@ type command struct {
type config struct {
paths cli.StringSlice
modeStr string
mode fs.FileMode
mode string
containerSpec string
}
@@ -69,13 +66,13 @@ func (m command) build() *cli.Command {
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "path",
Usage: "Specify a path to apply the specified mode to",
Usage: "Specifiy a path to apply the specified mode to",
Destination: &cfg.paths,
},
&cli.StringFlag{
Name: "mode",
Usage: "Specify the file mode",
Destination: &cfg.modeStr,
Destination: &cfg.mode,
},
&cli.StringFlag{
Name: "container-spec",
@@ -88,16 +85,10 @@ func (m command) build() *cli.Command {
}
func validateFlags(c *cli.Context, cfg *config) error {
if strings.TrimSpace(cfg.modeStr) == "" {
if strings.TrimSpace(cfg.mode) == "" {
return fmt.Errorf("a non-empty mode must be specified")
}
modeInt, err := strconv.ParseUint(cfg.modeStr, 8, 32)
if err != nil {
return fmt.Errorf("failed to parse mode as octal: %v", err)
}
cfg.mode = fs.FileMode(modeInt)
for _, p := range cfg.paths.Value() {
if strings.TrimSpace(p) == "" {
return fmt.Errorf("paths must not be empty")
@@ -121,38 +112,33 @@ func (m command) run(c *cli.Context, cfg *config) error {
return fmt.Errorf("empty container root detected")
}
paths := m.getPaths(containerRoot, cfg.paths.Value(), cfg.mode)
paths := m.getPaths(containerRoot, cfg.paths.Value())
if len(paths) == 0 {
m.logger.Debugf("No paths specified; exiting")
return nil
}
for _, path := range paths {
err = os.Chmod(path, cfg.mode)
// in some cases this is not an issue (e.g. whole /dev mounted), see #143
if errors.Is(err, fs.ErrPermission) {
m.logger.Debugf("Ignoring permission error with chmod: %v", err)
err = nil
}
locator := lookup.NewExecutableLocator(m.logger, "")
targets, err := locator.Locate("chmod")
if err != nil {
return fmt.Errorf("failed to locate chmod: %v", err)
}
chmodPath := targets[0]
return err
args := append([]string{filepath.Base(chmodPath), cfg.mode}, paths...)
return syscall.Exec(chmodPath, args, nil)
}
// getPaths updates the specified paths relative to the root.
func (m command) getPaths(root string, paths []string, desiredMode fs.FileMode) []string {
func (m command) getPaths(root string, paths []string) []string {
var pathsInRoot []string
for _, f := range paths {
path := filepath.Join(root, f)
stat, err := os.Stat(path)
if err != nil {
if _, err := os.Stat(path); err != nil {
m.logger.Debugf("Skipping path %q: %v", path, err)
continue
}
if (stat.Mode()&(fs.ModePerm|fs.ModeSetuid|fs.ModeSetgid|fs.ModeSticky))^desiredMode == 0 {
m.logger.Debugf("Skipping path %q: already desired mode", path)
continue
}
pathsInRoot = append(pathsInRoot, path)
}

View File

@@ -0,0 +1,230 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package symlinks
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type config struct {
hostRoot string
filenames cli.StringSlice
links cli.StringSlice
containerSpec string
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
cfg := config{}
// Create the '' command
c := cli.Command{
Name: "create-symlinks",
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "host-root",
Usage: "The root on the host filesystem to use to resolve symlinks",
Destination: &cfg.hostRoot,
},
&cli.StringSliceFlag{
Name: "csv-filename",
Usage: "Specify a (CSV) filename to process",
Destination: &cfg.filenames,
},
&cli.StringSliceFlag{
Name: "link",
Usage: "Specify a specific link to create. The link is specified as target::link",
Destination: &cfg.links,
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
csvFiles := cfg.filenames.Value()
chainLocator := lookup.NewSymlinkChainLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(cfg.hostRoot),
)
var candidates []string
for _, file := range csvFiles {
mountSpecs, err := csv.NewCSVFileParser(m.logger, file).Parse()
if err != nil {
m.logger.Debugf("Skipping CSV file %v: %v", file, err)
continue
}
for _, ms := range mountSpecs {
if ms.Type != csv.MountSpecSym {
continue
}
targets, err := chainLocator.Locate(ms.Path)
if err != nil {
m.logger.Warningf("Failed to locate symlink %v", ms.Path)
}
candidates = append(candidates, targets...)
}
}
created := make(map[string]bool)
// candidates is a list of absolute paths to symlinks in a chain, or the final target of the chain.
for _, candidate := range candidates {
target, err := symlinks.Resolve(candidate)
if err != nil {
m.logger.Debugf("Skipping invalid link: %v", err)
continue
} else if target == candidate {
m.logger.Debugf("%v is not a symlink", candidate)
continue
}
err = m.createLink(created, cfg.hostRoot, containerRoot, target, candidate)
if err != nil {
m.logger.Warningf("Failed to create link %v: %v", []string{target, candidate}, err)
}
}
links := cfg.links.Value()
for _, l := range links {
parts := strings.Split(l, "::")
if len(parts) != 2 {
m.logger.Warningf("Invalid link specification %v", l)
continue
}
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
if err != nil {
m.logger.Warningf("Failed to create link %v: %v", parts, err)
}
}
return nil
}
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
linkPath, err := changeRoot(hostRoot, containerRoot, link)
if err != nil {
m.logger.Warningf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
}
if created[linkPath] {
m.logger.Debugf("Link %v already created", linkPath)
return nil
}
targetPath, err := changeRoot(hostRoot, "/", target)
if err != nil {
m.logger.Warningf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
}
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)
err = os.MkdirAll(filepath.Dir(linkPath), 0755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
err = os.Symlink(target, linkPath)
if err != nil {
return fmt.Errorf("failed to create symlink: %v", err)
}
return nil
}
func changeRoot(current string, new string, path string) (string, error) {
if !filepath.IsAbs(path) {
return path, nil
}
relative := path
if current != "" {
r, err := filepath.Rel(current, path)
if err != nil {
return "", err
}
relative = r
}
return filepath.Join(new, relative), nil
}
// Locate returns the link target of the specified filename or an empty slice if the
// specified filename is not a symlink.
func (m command) Locate(filename string) ([]string, error) {
info, err := os.Lstat(filename)
if err != nil {
return nil, fmt.Errorf("failed to get file info: %v", info)
}
if info.Mode()&os.ModeSymlink == 0 {
m.logger.Debugf("%v is not a symlink", filename)
return nil, nil
}
target, err := os.Readlink(filename)
if err != nil {
return nil, fmt.Errorf("error checking symlink: %v", err)
}
m.logger.Debugf("Resolved link: '%v' => '%v'", filename, target)
return []string{target}, nil
}

View File

@@ -17,9 +17,11 @@
package hook
import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands"
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
"github.com/urfave/cli/v2"
)
@@ -27,7 +29,7 @@ type hookCommand struct {
logger logger.Interface
}
// NewCommand constructs CLI subcommand for handling CDI hooks.
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := hookCommand{
logger: logger,
@@ -37,24 +39,17 @@ func NewCommand(logger logger.Interface) *cli.Command {
// build
func (m hookCommand) build() *cli.Command {
// Create the 'hook' subcommand
// Create the 'hook' command
hook := cli.Command{
Name: "hook",
Usage: "A collection of hooks that may be injected into an OCI spec",
// We set the default action for the `hook` subcommand to issue a
// warning and exit with no error.
// This means that if an unsupported hook is run, a container will not fail
// to launch. An unsupported hook could be the result of a CDI specification
// referring to a new hook that is not yet supported by an older NVIDIA
// Container Toolkit version or a hook that has been removed in newer
// version.
Action: func(ctx *cli.Context) error {
commands.IssueUnsupportedHookWarning(m.logger, ctx)
return nil
},
}
hook.Subcommands = commands.New(m.logger)
hook.Subcommands = []*cli.Command{
ldcache.NewCommand(m.logger),
symlinks.NewCommand(m.logger),
chmod.NewCommand(m.logger),
}
return &hook
}

View File

@@ -0,0 +1,144 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type config struct {
folders cli.StringSlice
containerSpec string
}
// NewCommand constructs an update-ldcache command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build the update-ldcache command
func (m command) build() *cli.Command {
cfg := config{}
// Create the 'update-ldcache' command
c := cli.Command{
Name: "update-ldcache",
Usage: "Update ldcache in a container by running ldconfig",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "folder",
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
Destination: &cfg.folders,
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
_, err = os.Stat(filepath.Join(containerRoot, "/etc/ld.so.cache"))
if err != nil && os.IsNotExist(err) {
m.logger.Debugf("No ld.so.cache found, skipping update")
return nil
}
err = m.createConfig(containerRoot, cfg.folders.Value())
if err != nil {
return fmt.Errorf("failed to update ld.so.conf: %v", err)
}
args := []string{"/sbin/ldconfig"}
if containerRoot != "" {
args = append(args, "-r", containerRoot)
}
return syscall.Exec(args[0], args, nil)
}
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
// to include the required paths.
func (m command) createConfig(root string, folders []string) error {
if len(folders) == 0 {
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
return nil
}
if err := os.MkdirAll(filepath.Join(root, "/etc/ld.so.conf.d"), 0755); err != nil {
return fmt.Errorf("failed to create ld.so.conf.d: %v", err)
}
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
if err != nil {
return fmt.Errorf("failed to create config file: %v", err)
}
defer configFile.Close()
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
configured := make(map[string]bool)
for _, folder := range folders {
if configured[folder] {
continue
}
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
}
configured[folder] = true
}
// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := os.Chmod(configFile.Name(), 0644); err != nil {
return fmt.Errorf("failed to chmod config file: %v", err)
}
return nil
}

View File

@@ -17,9 +17,8 @@
package info
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {

View File

@@ -19,8 +19,6 @@ package main
import (
"os"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
@@ -28,6 +26,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
)
@@ -49,7 +48,6 @@ func main() {
// Create the top-level CLI
c := cli.NewApp()
c.DisableSliceFlagSeparator = true
c.Name = "NVIDIA Container Toolkit CLI"
c.UseShortOptionHandling = true
c.EnableBashCompletion = true

View File

@@ -28,7 +28,6 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml"
)
const (
@@ -44,17 +43,13 @@ const (
defaultContainerdConfigFilePath = "/etc/containerd/config.toml"
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
defaultConfigSource = configSourceFile
configSourceCommand = "command"
configSourceFile = "file"
)
type command struct {
logger logger.Interface
}
// NewCommand constructs a configure command with the specified logger
// NewCommand constructs an configure command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
@@ -68,8 +63,6 @@ type config struct {
dryRun bool
runtime string
configFilePath string
executablePath string
configSource string
mode string
hookFilePath string
@@ -119,22 +112,11 @@ func (m command) build() *cli.Command {
Usage: "path to the config file for the target runtime",
Destination: &config.configFilePath,
},
&cli.StringFlag{
Name: "executable-path",
Usage: "The path to the runtime executable. This is used to extract the current config",
Destination: &config.executablePath,
},
&cli.StringFlag{
Name: "config-mode",
Usage: "the config mode for runtimes that support multiple configuration mechanisms",
Destination: &config.mode,
},
&cli.StringFlag{
Name: "config-source",
Usage: "the source to retrieve the container runtime configuration; one of [command, file]\"",
Destination: &config.configSource,
Value: defaultConfigSource,
},
&cli.StringFlag{
Name: "oci-hook-path",
Usage: "the path to the OCI runtime hook to create if --config-mode=oci-hook is specified. If no path is specified, the generated hook is output to STDOUT.\n\tNote: The use of OCI hooks is deprecated.",
@@ -167,7 +149,7 @@ func (m command) build() *cli.Command {
},
&cli.BoolFlag{
Name: "cdi.enabled",
Aliases: []string{"cdi.enable", "enable-cdi"},
Aliases: []string{"cdi.enable"},
Usage: "Enable CDI in the configured runtime",
Destination: &config.cdi.enabled,
},
@@ -212,34 +194,6 @@ func (m command) validateFlags(c *cli.Context, config *config) error {
config.cdi.enabled = false
}
if config.executablePath != "" && config.runtime == "docker" {
m.logger.Warningf("Ignoring executable-path=%q flag for %v", config.executablePath, config.runtime)
config.executablePath = ""
}
switch config.configSource {
case configSourceCommand:
if config.runtime == "docker" {
m.logger.Warningf("A %v Config Source is not supported for %v; using %v", config.configSource, config.runtime, configSourceFile)
config.configSource = configSourceFile
}
case configSourceFile:
break
default:
return fmt.Errorf("unrecognized Config Source: %v", config.configSource)
}
if config.configFilePath == "" {
switch config.runtime {
case "containerd":
config.configFilePath = defaultContainerdConfigFilePath
case "crio":
config.configFilePath = defaultCrioConfigFilePath
case "docker":
config.configFilePath = defaultDockerConfigFilePath
}
}
return nil
}
@@ -256,29 +210,25 @@ func (m command) configureWrapper(c *cli.Context, config *config) error {
// configureConfigFile updates the specified container engine config file to enable the NVIDIA runtime.
func (m command) configureConfigFile(c *cli.Context, config *config) error {
configSource, err := config.resolveConfigSource()
if err != nil {
return err
}
configFilePath := config.resolveConfigFilePath()
var cfg engine.Interface
var err error
switch config.runtime {
case "containerd":
cfg, err = containerd.New(
containerd.WithLogger(m.logger),
containerd.WithPath(config.configFilePath),
containerd.WithConfigSource(configSource),
containerd.WithPath(configFilePath),
)
case "crio":
cfg, err = crio.New(
crio.WithLogger(m.logger),
crio.WithPath(config.configFilePath),
crio.WithConfigSource(configSource),
crio.WithPath(configFilePath),
)
case "docker":
cfg, err = docker.New(
docker.WithLogger(m.logger),
docker.WithPath(config.configFilePath),
docker.WithPath(configFilePath),
)
default:
err = fmt.Errorf("unrecognized runtime '%v'", config.runtime)
@@ -296,11 +246,12 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
return fmt.Errorf("unable to update config: %v", err)
}
if config.cdi.enabled {
cfg.EnableCDI()
err = enableCDI(config, cfg)
if err != nil {
return fmt.Errorf("failed to enable CDI in %s: %w", config.runtime, err)
}
outputPath := config.getOutputConfigPath()
outputPath := config.getOuputConfigPath()
n, err := cfg.Save(outputPath)
if err != nil {
return fmt.Errorf("unable to flush config: %v", err)
@@ -318,35 +269,28 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
return nil
}
// resolveConfigSource returns the default config source or the user provided config source
func (c *config) resolveConfigSource() (toml.Loader, error) {
switch c.configSource {
case configSourceCommand:
return c.getCommandConfigSource(), nil
case configSourceFile:
return toml.FromFile(c.configFilePath), nil
default:
return nil, fmt.Errorf("unrecognized config source: %s", c.configSource)
// resolveConfigFilePath returns the default config file path for the configured container engine
func (c *config) resolveConfigFilePath() string {
if c.configFilePath != "" {
return c.configFilePath
}
}
// getConfigSourceCommand returns the default cli command to fetch the current runtime config
func (c *config) getCommandConfigSource() toml.Loader {
switch c.runtime {
case "containerd":
return containerd.CommandLineSource("", c.executablePath)
return defaultContainerdConfigFilePath
case "crio":
return crio.CommandLineSource("", c.executablePath)
return defaultCrioConfigFilePath
case "docker":
return defaultDockerConfigFilePath
}
return toml.Empty
return ""
}
// getOutputConfigPath returns the configured config path or "" if dry-run is enabled
func (c *config) getOutputConfigPath() string {
// getOuputConfigPath returns the configured config path or "" if dry-run is enabled
func (c *config) getOuputConfigPath() string {
if c.dryRun {
return ""
}
return c.configFilePath
return c.resolveConfigFilePath()
}
// configureOCIHook creates and configures the OCI hook for the NVIDIA runtime
@@ -357,3 +301,17 @@ func (m *command) configureOCIHook(c *cli.Context, config *config) error {
}
return nil
}
// enableCDI enables the use of CDI in the corresponding container engine
func enableCDI(config *config, cfg engine.Interface) error {
if !config.cdi.enabled {
return nil
}
switch config.runtime {
case "containerd":
return cfg.Set("enable_cdi", true)
case "docker":
return cfg.Set("features", map[string]bool{"cdi": true})
}
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
}

View File

@@ -17,10 +17,9 @@
package runtime
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type runtimeCommand struct {

View File

@@ -19,13 +19,16 @@ package devchar
import (
"fmt"
"os"
"os/signal"
"path/filepath"
"github.com/urfave/cli/v2"
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/fsnotify/fsnotify"
"github.com/urfave/cli/v2"
)
const (
@@ -40,6 +43,7 @@ type config struct {
devCharPath string
driverRoot string
dryRun bool
watch bool
createAll bool
createDeviceNodes bool
loadKernelModules bool
@@ -82,7 +86,14 @@ func (m command) build() *cli.Command {
Usage: "The path to the driver root. `DRIVER_ROOT`/dev is searched for NVIDIA device nodes.",
Value: "/",
Destination: &cfg.driverRoot,
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
EnvVars: []string{"DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "watch",
Usage: "If set, the command will watch for changes to the driver root and recreate the symlinks when changes are detected.",
Value: false,
Destination: &cfg.watch,
EnvVars: []string{"WATCH"},
},
&cli.BoolFlag{
Name: "create-all",
@@ -115,7 +126,7 @@ func (m command) build() *cli.Command {
}
func (m command) validateFlags(r *cli.Context, cfg *config) error {
if cfg.createAll {
if cfg.createAll && cfg.watch {
return fmt.Errorf("create-all and watch are mutually exclusive")
}
@@ -133,6 +144,19 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
}
func (m command) run(c *cli.Context, cfg *config) error {
var watcher *fsnotify.Watcher
var sigs chan os.Signal
if cfg.watch {
watcher, err := newFSWatcher(filepath.Join(cfg.driverRoot, "dev"))
if err != nil {
return fmt.Errorf("failed to create FS watcher: %v", err)
}
defer watcher.Close()
sigs = newOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
}
l, err := NewSymlinkCreator(
WithLogger(m.logger),
WithDevCharPath(cfg.devCharPath),
@@ -146,11 +170,47 @@ func (m command) run(c *cli.Context, cfg *config) error {
return fmt.Errorf("failed to create symlink creator: %v", err)
}
create:
err = l.CreateLinks()
if err != nil {
return fmt.Errorf("failed to create links: %v", err)
}
return nil
if !cfg.watch {
return nil
}
for {
select {
case event := <-watcher.Events:
deviceNode := filepath.Base(event.Name)
if !strings.HasPrefix(deviceNode, "nvidia") {
continue
}
if event.Op&fsnotify.Create == fsnotify.Create {
m.logger.Infof("%s created, restarting.", event.Name)
goto create
}
if event.Op&fsnotify.Create == fsnotify.Remove {
m.logger.Infof("%s removed. Ignoring", event.Name)
}
// Watch for any other fs errors and log them.
case err := <-watcher.Errors:
m.logger.Errorf("inotify: %s", err)
// React to signals
case s := <-sigs:
switch s {
case syscall.SIGHUP:
m.logger.Infof("Received SIGHUP, recreating symlinks.")
goto create
default:
m.logger.Infof("Received signal %q, shutting down.", s)
return nil
}
}
}
}
type linkCreator struct {
@@ -338,3 +398,27 @@ type deviceNode struct {
func (d deviceNode) devCharName() string {
return fmt.Sprintf("%d:%d", d.major, d.minor)
}
func newFSWatcher(files ...string) (*fsnotify.Watcher, error) {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return nil, err
}
for _, f := range files {
err = watcher.Add(f)
if err != nil {
watcher.Close()
return nil, err
}
}
return watcher, nil
}
func newOSWatcher(sigs ...os.Signal) chan os.Signal {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, sigs...)
return sigChan
}

View File

@@ -20,10 +20,9 @@ import (
"path/filepath"
"strings"
"golang.org/x/sys/unix"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"golang.org/x/sys/unix"
)
type nodeLister interface {
@@ -64,13 +63,20 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
if m.nodeIsBlocked(d) {
continue
}
var stat unix.Stat_t
err := unix.Stat(d, &stat)
if err != nil {
m.logger.Warningf("Could not stat device: %v", err)
continue
}
deviceNodes = append(deviceNodes, newDeviceNode(d, stat))
deviceNode := deviceNode{
path: d,
major: unix.Major(uint64(stat.Rdev)),
minor: unix.Minor(uint64(stat.Rdev)),
}
deviceNodes = append(deviceNodes, deviceNode)
}
return deviceNodes, nil

View File

@@ -1,28 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devchar
import "golang.org/x/sys/unix"
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
deviceNode := deviceNode{
path: d,
major: unix.Major(stat.Rdev),
minor: unix.Minor(stat.Rdev),
}
return deviceNode
}

View File

@@ -1,30 +0,0 @@
//go:build !linux
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devchar
import "golang.org/x/sys/unix"
func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
deviceNode := deviceNode{
path: d,
major: unix.Major(uint64(stat.Rdev)),
minor: unix.Minor(uint64(stat.Rdev)),
}
return deviceNode
}

View File

@@ -19,11 +19,10 @@ package createdevicenodes
import (
"fmt"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -31,8 +30,7 @@ type command struct {
}
type options struct {
root string
devRoot string
driverRoot string
dryRun bool
@@ -66,21 +64,11 @@ func (m command) build() *cli.Command {
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "root",
// TODO: Remove this alias
Aliases: []string{"driver-root"},
Usage: "the path to to the root to use to load the kernel modules. This root must be a chrootable path. " +
"If device nodes to be created these will be created at `ROOT`/dev unless an alternative path is specified",
Name: "driver-root",
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.root,
// TODO: Remove the NVIDIA_DRIVER_ROOT and DRIVER_ROOT envvars.
EnvVars: []string{"ROOT", "NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
},
&cli.StringFlag{
Name: "dev-root",
Usage: "specify the root where `/dev` is located. If this is not specified, the root is assumed.",
Destination: &opts.devRoot,
EnvVars: []string{"NVIDIA_DEV_ROOT", "DEV_ROOT"},
Destination: &opts.driverRoot,
EnvVars: []string{"DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "control-devices",
@@ -94,7 +82,7 @@ func (m command) build() *cli.Command {
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "if set, the command will not perform any operations",
Usage: "if set, the command will not create any symlinks.",
Value: false,
Destination: &opts.dryRun,
EnvVars: []string{"DRY_RUN"},
@@ -105,10 +93,6 @@ func (m command) build() *cli.Command {
}
func (m command) validateFlags(r *cli.Context, opts *options) error {
if opts.devRoot == "" && opts.root != "" {
m.logger.Infof("Using dev-root %q", opts.root)
opts.devRoot = opts.root
}
return nil
}
@@ -117,7 +101,7 @@ func (m command) run(c *cli.Context, opts *options) error {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(opts.dryRun),
nvmodules.WithRoot(opts.root),
nvmodules.WithRoot(opts.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
@@ -128,12 +112,12 @@ func (m command) run(c *cli.Context, opts *options) error {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(opts.dryRun),
nvdevices.WithDevRoot(opts.devRoot),
nvdevices.WithDevRoot(opts.driverRoot),
)
if err != nil {
return err
}
m.logger.Infof("Creating control device nodes at %s", opts.devRoot)
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
}

View File

@@ -0,0 +1,101 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package createdevicenodes
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type options struct {
driverRoot string
}
// NewCommand constructs a command sub-command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "print-ldcache",
Usage: "A utility to print the contents of the ldcache",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "driver-root",
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"DRIVER_ROOT"},
},
}
return &c
}
func (m command) validateFlags(r *cli.Context, opts *options) error {
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
cache, err := ldcache.New(m.logger, opts.driverRoot)
if err != nil {
return fmt.Errorf("failed to create ldcache: %v", err)
}
lib32, lib64 := cache.List()
if len(lib32) == 0 {
m.logger.Info("No 32-bit libraries found")
} else {
m.logger.Infof("%d 32-bit libraries found", len(lib32))
for _, lib := range lib32 {
m.logger.Infof("%v", lib)
}
}
if len(lib64) == 0 {
m.logger.Info("No 64-bit libraries found")
} else {
m.logger.Infof("%d 64-bit libraries found", len(lib64))
for _, lib := range lib64 {
m.logger.Infof("%v", lib)
}
}
return nil
}

Some files were not shown because too many files have changed in this diff Show More