Compare commits

...

233 Commits

Author SHA1 Message Date
Evan Lezar
f2eb4ea9ba Merge pull request #549 from elezar/bump-v1.16.0-rc.1
Bump v1.16.0 rc.1
2024-06-17 15:35:27 +02:00
Evan Lezar
4686f9499c Add basic release workflow
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-17 15:33:11 +02:00
Evan Lezar
3f481cd20a Update changelog for v1.16.0-rc.1 release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-17 15:17:28 +02:00
Evan Lezar
cd52be86e6 Merge pull request #529 from elezar/vulkan-location
Support vulkan ICD files in a driver root
2024-06-17 15:00:29 +02:00
Evan Lezar
b5743da52f Merge pull request #526 from elezar/add-dev-root-to-create-device-nodes
Add dev-root option to create-device-nodes
2024-06-17 14:57:26 +02:00
Evan Lezar
03ccd64f33 Merge pull request #512 from NVIDIA/dependabot/go_modules/main/github.com/NVIDIA/go-nvml-0.12.4-0
Bump github.com/NVIDIA/go-nvml from 0.12.0-6 to 0.12.4-0
2024-06-17 14:23:00 +02:00
Evan Lezar
33369861fc Merge pull request #547 from NVIDIA/revert-490-main
Revert "Inject additional libraries required for full display functionality"
2024-06-17 11:47:36 +02:00
Evan Lezar
d9a1106e00 Revert "Inject additional libraries required for full display functionality"
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-17 11:47:15 +02:00
Evan Lezar
f26425d3fd Merge pull request #490 from ehfd/main
Inject additional libraries required for full display functionality
2024-06-17 11:41:38 +02:00
Evan Lezar
272585d261 Merge pull request #544 from elezar/allow-toolkit-pid-file-to-be-specified
Allow toolkit.pid path to be specified
2024-06-17 11:39:44 +02:00
Evan Lezar
fe5a44cb35 Merge pull request #527 from elezar/increase-priority-of-injected-libs
Increase priority of ld.so.conf.d config file
2024-06-17 11:34:43 +02:00
Evan Lezar
5f2be72335 Support vulkan ICD files in a driver root
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-17 11:33:29 +02:00
Evan Lezar
ae074e7ba2 Merge pull request #545 from elezar/bump-version-v1.16.0-rc.1
Bump version to v1.16.0-rc.1
2024-06-17 11:32:07 +02:00
Evan Lezar
876d479308 Allow toolkit.pid path to be specified
This change makes the following changes:
* Allows the toolkit.pid path to be specified
* Creates the toolkit.pid file at /run/nvidia/toolkit/toolkit.pid by default
* Handles failures to remove the /run/nvidia/toolkit folder

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-17 11:26:23 +02:00
dependabot[bot]
abd638add9 Bump github.com/NVIDIA/go-nvml from 0.12.0-6 to 0.12.4-0
Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.12.0-6 to 0.12.4-0.
- [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.12.0-6...v0.12.4-0)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-06-14 13:27:46 +00:00
Evan Lezar
1dd59101c7 Bump version to v1.16.0-rc.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-14 15:25:21 +02:00
Evan Lezar
55630bc2c0 Merge pull request #542 from elezar/remove-provenance
Remove provenance information from image manifests
2024-06-14 15:16:06 +02:00
Evan Lezar
4f0de9f1ef Increase priority of ld.so.conf.d config file
This change ensures that the created /etc/ld.so.conf.d file
has a higher priority to ensure that the injected libraries
take precendence over non-compat libraries.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-13 13:49:14 +02:00
Evan Lezar
bced007f87 Remove provenance information from image manifests
Tools such as oc mirror do not support the provenence metadata
added to the image manifests with newer docker buildx versions.

This change disables the addition of provenance information.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-13 13:36:07 +02:00
Evan Lezar
ac90b7963d Merge pull request #519 from shivakunv/ngc_signing_job
add ngc image signing job for auto signing
2024-06-13 12:01:19 +02:00
shiva kumar
2e947edbe4 add ngc image signing job for auto signing
Signed-off-by: shiva kumar <shivaku@nvidia.com>
2024-06-12 13:20:35 +05:30
Evan Lezar
9fde4b21df Merge pull request #539 from elezar/fix-ppcle64-builds
Fix ppcle64 builds
2024-06-11 14:49:37 +02:00
Evan Lezar
84e0060fe8 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-11 14:31:31 +02:00
Evan Lezar
024dd3126d Use archived package repo for centos:stream8
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-11 14:31:06 +02:00
Evan Lezar
86b272cc7b Merge pull request #533 from NVIDIA/dependabot/go_modules/main/golang.org/x/mod-0.18.0
Bump golang.org/x/mod from 0.17.0 to 0.18.0
2024-06-10 13:43:15 +02:00
dependabot[bot]
2bc24970e0 Bump golang.org/x/mod from 0.17.0 to 0.18.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.17.0 to 0.18.0.
- [Commits](https://github.com/golang/mod/compare/v0.17.0...v0.18.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-06-10 11:35:48 +00:00
Evan Lezar
00dc0daecc Merge pull request #532 from NVIDIA/dependabot/go_modules/main/golang.org/x/sys-0.21.0
Bump golang.org/x/sys from 0.20.0 to 0.21.0
2024-06-10 13:34:42 +02:00
dependabot[bot]
e3120cbe64 Bump golang.org/x/sys from 0.20.0 to 0.21.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.20.0 to 0.21.0.
- [Commits](https://github.com/golang/sys/compare/v0.20.0...v0.21.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-06-10 11:32:41 +00:00
Evan Lezar
00d82dd540 Merge pull request #536 from elezar/bump-github.com/xrash/smetrics
Bump github.com/xrash/smetrics to v0.0.0-20240521201337-686a1a2994c1
2024-06-10 13:31:58 +02:00
Evan Lezar
8fe366683e Bump github.com/xrash/smetrics to v0.0.0-20240521201337-686a1a2994c1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-10 13:30:14 +02:00
Evan Lezar
7320fcd86d Merge pull request #524 from NVIDIA/dependabot/docker/deployments/container/main/nvidia/cuda-12.5.0-base-ubuntu20.04
Bump nvidia/cuda from 12.4.1-base-ubuntu20.04 to 12.5.0-base-ubuntu20.04 in /deployments/container
2024-06-10 13:23:25 +02:00
Evan Lezar
01f212b7a8 Merge pull request #528 from elezar/set-cdi-permissions-644
Set default CDI spec permissions to 644
2024-06-10 13:20:18 +02:00
Evan Lezar
71e0b8590f Set default CDI spec permissions to 644
Although the nvidia-ctk cdi generate command generates
specs with 644 permissions, the nvidia-ctk cdi transform
commands do not. This change sets the default permissions
to 600 instead of 644.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-05 11:27:03 +02:00
tux-rampage
e841c6256a Add driver hooks 2024-06-05 06:42:25 +02:00
tux-rampage
c2411e644e Build xorg lib search paths dynamically 2024-06-04 19:40:05 +02:00
Evan Lezar
dffce25637 Rename driver-root option to root
This change renames the nvidia-ctk system create-device-nodes
flag driver-root to root. This makes it clearer that this is
used to load the kernel modules and is not specific to the
user-mode driver installation.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-04 10:25:49 +02:00
Evan Lezar
f5a4b23041 Add dev-root option to create-device-nodes
This allows for dev nodes to be created in cases
where the driver root and the dev root do not
match.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-04 10:24:26 +02:00
Evan Lezar
dfc8e22e12 Merge pull request #360 from elezar/add-device-root-to-toolkit-container
Add dev-root option to toolkit container
2024-06-04 10:09:37 +02:00
Seungmin Kim
155fe66575 Merge branch 'NVIDIA:main' into main 2024-06-04 16:29:35 +09:00
Evan Lezar
9208159263 Add dev-root option to toolkit container
This changes adds an option to the toolkit container to allow
the dev root to be specified. This adds support for driver installations
where the driver files are at one root and the dev nodes are created
elsewhere -- most typically at /. This is the case, for example, for
GKE driver installations.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-03 20:40:30 +02:00
Evan Lezar
9b83d09f18 Merge pull request #440 from elezar/cdi-generation-with-driver-root
Find libnvidia-ml in driver root
2024-06-03 13:31:54 +02:00
Evan Lezar
c5eda7af8e Ensure that libnvidia-ml.so.1 is found in driver root
This change ensures that the driver root is used to locate libnvidia-ml.so.1
if required.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-06-03 12:01:10 +02:00
dependabot[bot]
572b0401a4 Bump nvidia/cuda in /deployments/container
Bumps nvidia/cuda from 12.4.1-base-ubuntu20.04 to 12.5.0-base-ubuntu20.04.

---
updated-dependencies:
- dependency-name: nvidia/cuda
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-06-03 08:21:46 +00:00
Seungmin Kim
0d70052105 Merge branch 'NVIDIA:main' into main 2024-06-02 17:42:21 +09:00
Evan Lezar
bead6f98f3 Merge pull request #518 from elezar/fix-staging-image-repo
Remove trailing slash from staging registry
2024-05-29 16:51:39 +02:00
Evan Lezar
533d7119db Remove trailing slash from staging registry
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-29 16:51:20 +02:00
Evan Lezar
e4b46a09a7 Merge pull request #516 from elezar/update-infolib-construction
Update infolib construction
2024-05-28 13:32:55 +02:00
Evan Lezar
8fc4b9c742 Add WithInfoLib option to CDI package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-28 13:30:31 +02:00
Evan Lezar
ef57c07199 Bump github.com/NVIDIA/go-nvlib to v0.5.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-28 13:28:28 +02:00
Evan Lezar
b407109bdf Merge pull request #515 from elezar/fix-cdi-construction
Ensure consistent construction order for libs
2024-05-28 12:33:37 +02:00
Evan Lezar
abb5abaea4 Ensure consistent construction order for libs
This change ensures that nvnllib and devicelib are constructed
before these are used to construct infolib.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-28 12:05:44 +02:00
Evan Lezar
e55e6abc09 Merge pull request #506 from elezar/set-version-on-transform
Set mininum spec version on save
2024-05-28 11:48:28 +02:00
Evan Lezar
17c044eef8 Set minimum version on save
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-24 15:36:18 +02:00
Evan Lezar
edda11d647 Merge pull request #428 from elezar/fix-cdi-mode-resolution
Fix cdi mode resolution
2024-05-21 13:22:10 +02:00
Evan Lezar
52d0383b47 Bump github.com/NVIDIA/go-nvlib to v0.4.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-21 12:25:54 +02:00
Evan Lezar
3defc6babb Use go-nvlib mode resolution
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-21 12:25:54 +02:00
Evan Lezar
7b988f15ab Merge pull request #474 from deitch/separate-hook-binary
move nvidia-ctk hook command into nvidia-cdi-hook binary
2024-05-21 12:24:56 +02:00
Avi Deitcher
179d8655f9 Move nvidia-ctk hook command into own binary
This change creates an nvidia-cdi-hook binary for implementing
CDI hooks. This allows for these hooks to be separated from the
nvidia-ctk command which may, for example, require libnvidia-ml
to support other functionality.

The nvidia-ctk hook subcommand is maintained as an alias for the
time being to allow for existing CDI specifications referring to
this path to work as expected.

Signed-off-by: Avi Deitcher <avi@deitcher.net>
2024-05-21 12:19:44 +02:00
Evan Lezar
2d7b2360d2 Merge pull request #497 from elezar/systemdcgroup
Add option to set additional containerd configs per runtime
2024-05-21 12:05:51 +02:00
Evan Lezar
a61dc148b2 Merge pull request #501 from NVIDIA/dependabot/go_modules/main/github.com/NVIDIA/go-nvml-0.12.0-6
Bump github.com/NVIDIA/go-nvml from 0.12.0-5 to 0.12.0-6
2024-05-21 11:35:58 +02:00
dependabot[bot]
3f6b916a85 Bump github.com/NVIDIA/go-nvml from 0.12.0-5 to 0.12.0-6
Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.12.0-5 to 0.12.0-6.
- [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.12.0-5...v0.12.0-6)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-05-21 11:28:09 +02:00
Seungmin Kim
cf388e7e63 Update internal/discover/graphics.go
Co-authored-by: Evan Lezar <evanlezar@gmail.com>
Signed-off-by: Seungmin Kim <8457324+ehfd@users.noreply.github.com>
2024-05-17 23:55:36 +09:00
Evan Lezar
b435b797af Add support for adding additional containerd configs
This allow for options such as SystemdCgroup to be optionally set.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-17 12:58:08 +02:00
Evan Lezar
c86c3aeeaf Allow per-runtime config overrides
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-17 12:58:08 +02:00
Evan Lezar
f13f1bdba4 Merge pull request #484 from elezar/fix-config-set
Use : as a config --set slice separator
2024-05-13 12:32:53 +02:00
Seungmin Kim
55440f40b3 Make X11 search paths accurate
Signed-off-by: Seungmin Kim <8457324+ehfd@users.noreply.github.com>
2024-05-11 04:31:11 +09:00
Seungmin Kim
cc34996684 Inject additional libraries required for full X11 functionality and fix paths
Signed-off-by: Seungmin Kim <8457324+ehfd@users.noreply.github.com>
2024-05-11 00:33:41 +09:00
Evan Lezar
5a3eda4cba Use : as a config --set list separator
This allows settings such as:

nvidia-ctk config --set nvidia-container-runtime.runtimes=crun:runc

to be applied correctly.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-08 17:26:59 +02:00
Evan Lezar
973a6633b3 Merge pull request #479 from NVIDIA/dependabot/go_modules/main/github.com/urfave/cli/v2-2.27.2
Bump github.com/urfave/cli/v2 from 2.27.1 to 2.27.2
2024-05-07 22:46:49 +02:00
Evan Lezar
f4d0cfb687 Merge pull request #318 from cdesiniotis/update-func-signature
Get device specs by Identifier
2024-05-07 22:45:38 +02:00
Christopher Desiniotis
35b23c5a2c Accept device.Identifiers for requesting CDI specs
This change moves from using strings to useing device.Identifiers
as input for requesting CDI specifications for specific
devices.

Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-05-07 21:50:28 +02:00
Evan Lezar
0dc87e5d69 Merge pull request #488 from NVIDIA/dependabot/github_actions/golangci/golangci-lint-action-6
Bump golangci/golangci-lint-action from 5 to 6
2024-05-07 15:22:24 +02:00
Evan Lezar
edc50f6e49 Merge pull request #485 from NVIDIA/dependabot/go_modules/main/golang.org/x/sys-0.20.0
Bump golang.org/x/sys from 0.19.0 to 0.20.0
2024-05-07 15:21:58 +02:00
dependabot[bot]
7de7444b0f Bump golangci/golangci-lint-action from 5 to 6
Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 5 to 6.
- [Release notes](https://github.com/golangci/golangci-lint-action/releases)
- [Commits](https://github.com/golangci/golangci-lint-action/compare/v5...v6)

---
updated-dependencies:
- dependency-name: golangci/golangci-lint-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-05-07 08:25:32 +00:00
dependabot[bot]
8d3ffcd122 Bump github.com/urfave/cli/v2 from 2.27.1 to 2.27.2
Bumps [github.com/urfave/cli/v2](https://github.com/urfave/cli) from 2.27.1 to 2.27.2.
- [Release notes](https://github.com/urfave/cli/releases)
- [Changelog](https://github.com/urfave/cli/blob/main/docs/CHANGELOG.md)
- [Commits](https://github.com/urfave/cli/compare/v2.27.1...v2.27.2)

---
updated-dependencies:
- dependency-name: github.com/urfave/cli/v2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-05-06 11:45:55 +02:00
dependabot[bot]
d72481cbd7 Bump golang.org/x/sys from 0.19.0 to 0.20.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.19.0 to 0.20.0.
- [Commits](https://github.com/golang/sys/compare/v0.19.0...v0.20.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-05-05 08:27:16 +00:00
Evan Lezar
a442a5ed1f Merge pull request #478 from NVIDIA/dependabot/go_modules/main/github.com/NVIDIA/go-nvml-0.12.0-5
Bump github.com/NVIDIA/go-nvml from 0.12.0-4 to 0.12.0-5
2024-05-03 13:14:38 +02:00
dependabot[bot]
7de58b4af4 Bump github.com/NVIDIA/go-nvml from 0.12.0-4 to 0.12.0-5
Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.12.0-4 to 0.12.0-5.
- [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.12.0-4...v0.12.0-5)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-28 08:29:54 +00:00
Carlos Eduardo Arango Gutierrez
fde099d25b Merge pull request #476 from NVIDIA/dependabot/github_actions/golangci/golangci-lint-action-5
Bump golangci/golangci-lint-action from 4 to 5
2024-04-25 12:31:18 +02:00
dependabot[bot]
0a3eb67df8 Bump golangci/golangci-lint-action from 4 to 5
Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 4 to 5.
- [Release notes](https://github.com/golangci/golangci-lint-action/releases)
- [Commits](https://github.com/golangci/golangci-lint-action/compare/v4...v5)

---
updated-dependencies:
- dependency-name: golangci/golangci-lint-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-25 08:31:16 +00:00
Carlos Eduardo Arango Gutierrez
78f250a6b0 Merge pull request #469 from elezar/switch-to-ghimages
Switch to ghcr.io/nvidia/container-toolkit staging images
2024-04-22 10:19:56 +02:00
Evan Lezar
0aed9a16ad Merge pull request #460 from NVIDIA/dependabot/go_modules/main/github.com/NVIDIA/go-nvml-0.12.0-4
Bump github.com/NVIDIA/go-nvml from 0.12.0-3 to 0.12.0-4
2024-04-19 11:46:20 +02:00
Evan Lezar
f46b99c2f7 Switch to ghcr.io/nvidia/container-toolkit staging images
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-18 15:04:59 +02:00
Evan Lezar
d5f6e6f868 Use nvml/mock package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-18 14:53:37 +02:00
Evan Lezar
082ce066ed Replace go-nvlib/pkg/nvml with go-nvml/pkg/nvml
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-18 14:53:37 +02:00
Evan Lezar
bbaf543537 Update github.com/NVIDIA/go-nvlib to v0.3.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-18 14:53:37 +02:00
dependabot[bot]
50dd460eaa Bump github.com/NVIDIA/go-nvml from 0.12.0-3 to 0.12.0-4
Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.12.0-3 to 0.12.0-4.
- [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.12.0-3...v0.12.0-4)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-18 14:53:37 +02:00
Evan Lezar
b3af77166b Merge pull request #467 from NVIDIA/dependabot/go_modules/main/tags.cncf.io/container-device-interface-0.7.2
Bump tags.cncf.io/container-device-interface from 0.7.1 to 0.7.2
2024-04-18 14:50:33 +02:00
dependabot[bot]
d8cb812c8e Bump tags.cncf.io/container-device-interface from 0.7.1 to 0.7.2
Bumps [tags.cncf.io/container-device-interface](https://github.com/cncf-tags/container-device-interface) from 0.7.1 to 0.7.2.
- [Release notes](https://github.com/cncf-tags/container-device-interface/releases)
- [Commits](https://github.com/cncf-tags/container-device-interface/compare/v0.7.1...v0.7.2)

---
updated-dependencies:
- dependency-name: tags.cncf.io/container-device-interface
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-18 10:12:16 +00:00
Carlos Eduardo Arango Gutierrez
80386a7fb2 Merge pull request #463 from elezar/switch-maintenance-branch
Update maintenance dependabot rule
2024-04-18 12:11:04 +02:00
Evan Lezar
c0a5bbe7db Update maintenance dependabot rule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-16 15:36:20 +02:00
Evan Lezar
ddeeca392c Merge pull request #462 from elezar/bump-version-v1.15.0
Bump version to v1.15.0
2024-04-15 15:21:52 +02:00
Evan Lezar
9944feee45 Bump version to v1.15.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-15 14:37:16 +02:00
Evan Lezar
762b14b6cd Merge pull request #459 from elezar/remove-runtime-docker
Remove runtime and docker packages
2024-04-15 11:51:32 +02:00
Evan Lezar
e76e10fb36 Remove third_party package folders
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-12 14:35:03 +02:00
Evan Lezar
fcdf565586 Remove tooling to build packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-12 14:32:46 +02:00
Evan Lezar
7a9bc14d98 Merge pull request #425 from jmbaur/discover-use-xdg
Use XDG_DATA_DIRS instead of hardcoding /usr/share
2024-04-11 19:09:53 +02:00
Jared Baur
5788e622f4 Use XDG_DATA_DIRS instead of hardcoding /usr/share
When running nvidia-ctk on a system that uses a custom XDG_DATA_DIRS
environment variable value, the configuration files for `glvnd`,
`vulkan`, and `egl` fail to get passed through from the host to the
container. Reading from XDG_DATA_DIRS instead of hardcoding the default
value allows for finding said files so they can be mounted in the
container.

Signed-off-by: Jared Baur <jaredbaur@fastmail.com>
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-11 17:13:02 +02:00
Evan Lezar
29c0f82ed2 Merge pull request #327 from elezar/add-driver-config
Add config search path option to driver root
2024-04-11 16:58:33 +02:00
Evan Lezar
e1417bee64 Merge pull request #456 from elezar/fix-ubi8-image-build
Remove unneeded repo manipulation
2024-04-11 16:54:31 +02:00
Evan Lezar
5f9e49705c Remove unneeded repo manipulation
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-11 16:44:42 +02:00
Evan Lezar
1d2b61ee11 Merge pull request #455 from elezar/fix-ubi8-image-build
Fix typo in dockerfile
2024-04-11 15:10:04 +02:00
Evan Lezar
271987d448 Fix typo in dockerfile
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-11 15:06:22 +02:00
Evan Lezar
6cac2f5848 Merge pull request #446 from NVIDIA/dependabot/go_modules/main/golang.org/x/sys-0.19.0
Bump golang.org/x/sys from 0.18.0 to 0.19.0
2024-04-11 11:41:51 +02:00
dependabot[bot]
ef4eb0d3c6 Bump golang.org/x/sys from 0.18.0 to 0.19.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.18.0 to 0.19.0.
- [Commits](https://github.com/golang/sys/compare/v0.18.0...v0.19.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-11 09:39:33 +00:00
Evan Lezar
04ab0595fa Merge pull request #449 from NVIDIA/dependabot/go_modules/main/golang.org/x/mod-0.17.0
Bump golang.org/x/mod from 0.16.0 to 0.17.0
2024-04-11 11:38:52 +02:00
Evan Lezar
9d3418d603 Merge pull request #454 from NVIDIA/dependabot/docker/deployments/container/nvidia/cuda-12.4.1-base-ubuntu20.04
Bump nvidia/cuda from 12.3.2-base-ubuntu20.04 to 12.4.1-base-ubuntu20.04 in /deployments/container
2024-04-11 11:38:18 +02:00
dependabot[bot]
57acd85fb1 Bump nvidia/cuda in /deployments/container
Bumps nvidia/cuda from 12.3.2-base-ubuntu20.04 to 12.4.1-base-ubuntu20.04.

---
updated-dependencies:
- dependency-name: nvidia/cuda
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-10 14:11:46 +00:00
Evan Lezar
6d69ca81de Merge pull request #453 from elezar/bump-cuda-version-in-containers
Add dependabot update for CUDA base images
2024-04-10 16:11:26 +02:00
Evan Lezar
be73581489 Add dependabot update for Dockefiles
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-10 16:06:27 +02:00
Evan Lezar
5682ce3149 Specify CUDA base images directly in Dockerfile
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-10 16:06:27 +02:00
dependabot[bot]
cb2b000ddc Bump golang.org/x/mod from 0.16.0 to 0.17.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.16.0 to 0.17.0.
- [Commits](https://github.com/golang/mod/compare/v0.16.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-08 11:01:21 +00:00
Evan Lezar
cbc6ff73a4 Merge pull request #441 from elezar/bump-cdi-version
Update tags.cncf.io/container-device-interface to v0.7.1
2024-04-08 13:00:33 +02:00
Evan Lezar
4cd86caf67 Use NewCache instead of GetRegistry
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-05 17:09:17 +02:00
Evan Lezar
885313af3b Bump tags.cncf.io/container-device-interface to v0.7.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-05 17:05:40 +02:00
Evan Lezar
26e52b8013 Merge pull request #438 from elezar/refactor-driver-root
Create root.Driver instance at first usage
2024-04-03 15:11:45 +02:00
Evan Lezar
011c658945 Create root.Driver instance at first usage
This allows for testing through injection of the driver root.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-03 15:03:33 +02:00
Evan Lezar
413da20838 Merge pull request #362 from elezar/add-feature-flags
Add support for feature flags
2024-04-03 12:04:18 +02:00
Evan Lezar
09341a0934 Add support for feature flags
This change adds a features config that allows
individual features to be toggled at a global level. Each feature can (by default)
be controlled by an environment variable.

The GDS, MOFED, NVSWITCH, and GDRCOPY features are examples of such features.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-03 11:58:37 +02:00
Evan Lezar
2a9e3537ec Add config search paths option to driver root.
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-02 23:03:05 +02:00
Evan Lezar
c374520b64 Merge pull request #436 from elezar/remove-verbose-from-tests
Remove verbose from tests
2024-04-02 17:46:33 +02:00
Evan Lezar
e982b9798c Remove verbose from tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-04-02 17:45:25 +02:00
Evan Lezar
7eb031919c Merge pull request #430 from lengrongfu/main
fix doc link
2024-04-02 17:41:19 +02:00
Evan Lezar
97950d6b8d Merge pull request #418 from elezar/bump-golang-version
Bump golang version to v1.22.1
2024-04-02 10:59:45 +02:00
Evan Lezar
1613f35bf5 Merge pull request #419 from elezar/rename-build-deployments
Rename build folder deployments
2024-04-02 10:57:54 +02:00
rongfu.leng
a78a7f866f fix doc
Signed-off-by: rongfu.leng <lenronfu@gmail.com>
2024-03-28 13:55:25 +08:00
Evan Lezar
643b89e539 Add driver.Config
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-25 20:18:15 +02:00
Evan Lezar
bdfa525a75 Merge pull request #427 from elezar/add-functional-options-to-driver-root
Use functional options to construct driver root
2024-03-25 20:17:54 +02:00
Evan Lezar
93763d25f0 Use functional options to construct driver root
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-25 20:13:25 +02:00
Evan Lezar
5800e55027 Rename build folder deployments
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-19 14:00:24 +02:00
Evan Lezar
c572c3b787 Remove lint-internal make target
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-19 13:54:16 +02:00
Evan Lezar
3f7ed7c8db Rename golangci-lint target to lint
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-19 13:54:16 +02:00
Evan Lezar
cc6cbd4a89 Use versions.mk GOLANG version in CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-19 13:54:16 +02:00
Evan Lezar
98ad835a77 Add vendor and check-vendor make targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-19 13:48:58 +02:00
Evan Lezar
3a1ac85020 Bump golang version to 1.22.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-19 13:46:47 +02:00
Evan Lezar
1ddc859700 Merge pull request #417 from elezar/bump-version-v1.15.0-rc.4
Bump version v1.15.0 rc.4
2024-03-19 10:47:10 +02:00
Evan Lezar
f1f629674e Bump CUDA base image to 12.3.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-19 10:37:36 +02:00
Evan Lezar
5a6bf02914 Bump version to v1.15.0-rc.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-19 10:36:02 +02:00
Evan Lezar
197cbbe0c6 Merge pull request #411 from elezar/bump-go-nvlib
Bump go-nvlib to v0.2.0  and go-nvml v0.12.0-3
2024-03-15 15:12:01 +02:00
Evan Lezar
b9abb44613 Bump go-nvlib to v0.2.0 and go-nvml v0.12.0-3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-15 15:09:12 +02:00
Evan Lezar
c4ec4a01f8 Merge pull request #384 from tariq1890/upd-go
Fix dockerfile arch selection
2024-03-15 08:58:27 +02:00
Tariq Ibrahim
f40f4369a1 Fix dockerfile arch selection
This change includes an arm64 arch check when installing golang.

Signed-off-by: Tariq Ibrahim <tibrahim@nvidia.com>
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-15 08:54:00 +02:00
Evan Lezar
2733661125 Merge pull request #383 from elezar/fix-golangci-lint-in-shell
Add GOLANGCI_LINT_CACHE to docker make targets
2024-03-15 08:51:29 +02:00
Evan Lezar
4806f6e70d Merge pull request #395 from elezar/add-nvidia-visible-devices-void
Add NVIDIA_VISIBLE_DEVICES=void to CDI specs
2024-03-13 10:00:38 +02:00
Evan Lezar
db21f5f9a8 Merge pull request #400 from NVIDIA/dependabot/go_modules/main/golang.org/x/sys-0.18.0
Bump golang.org/x/sys from 0.17.0 to 0.18.0
2024-03-12 15:16:05 +02:00
Evan Lezar
07443a0e86 Merge pull request #405 from NVIDIA/dependabot/submodules/main/third_party/libnvidia-container-6c8f1df
Bump third_party/libnvidia-container from `86f1946` to `6c8f1df`
2024-03-12 15:15:32 +02:00
dependabot[bot]
675db67ebb Bump third_party/libnvidia-container from 86f1946 to 6c8f1df
Bumps [third_party/libnvidia-container](https://github.com/NVIDIA/libnvidia-container) from `86f1946` to `6c8f1df`.
- [Release notes](https://github.com/NVIDIA/libnvidia-container/releases)
- [Commits](86f19460fb...6c8f1df7fd)

---
updated-dependencies:
- dependency-name: third_party/libnvidia-container
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-12 12:10:07 +00:00
Evan Lezar
14ecacf6d1 Merge pull request #403 from elezar/add-github-submodule-update
Add dependabot config to update libnvidia-container submodule
2024-03-12 13:47:17 +02:00
Evan Lezar
9451da1e6d Add dependabot config to update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-12 13:45:04 +02:00
Evan Lezar
e30ddb398f Merge pull request #397 from jbujak/enumAdapters3
Use D3DKMTEnumAdapters3 for adapter enumeration
2024-03-12 13:33:25 +02:00
dependabot[bot]
375188495e Bump golang.org/x/sys from 0.17.0 to 0.18.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.17.0 to 0.18.0.
- [Commits](https://github.com/golang/sys/compare/v0.17.0...v0.18.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-12 11:14:05 +00:00
Evan Lezar
5ff48a5a89 Merge pull request #401 from NVIDIA/dependabot/go_modules/main/golang.org/x/mod-0.16.0
Bump golang.org/x/mod from 0.15.0 to 0.16.0
2024-03-12 13:13:24 +02:00
Jakub Bujak
44ae31d101 Use D3DKMTEnumAdapters3 for adapter enumeration
D3DKMTEnumAdapters3 is required to enumerate MCDM compute-only adapters

Signed-off-by: Jakub Bujak <jbujak@nvidia.com>
2024-03-12 11:42:08 +01:00
dependabot[bot]
942e5c7224 Bump golang.org/x/mod from 0.15.0 to 0.16.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.15.0 to 0.16.0.
- [Commits](https://github.com/golang/mod/compare/v0.15.0...v0.16.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-10 08:26:44 +00:00
Evan Lezar
88ad42ccd1 Add NVIDIA_VISIBLE_DEVICES=void to CDI specs
This change ensures taht NVIDIA_VISIBLE_DEVICES=void is included in
generated CDI specs. This prevents the NVIDIA Container Runtime Hook
from injecting devices if NVIDIA_VISIBLE_DEVICES=all is set.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-04 16:10:06 +02:00
Evan Lezar
df9732dae4 Merge pull request #369 from NVIDIA/dependabot/go_modules/main/github.com/opencontainers/runtime-spec-1.2.0
Bump github.com/opencontainers/runtime-spec from 1.1.0 to 1.2.0
2024-03-01 22:49:29 +02:00
Evan Lezar
e66cc6a7b1 Merge pull request #392 from elezar/fix-non-fork-image-builds
Allow finer control of pushing images
2024-03-01 22:49:01 +02:00
dependabot[bot]
8f5a9a1918 Bump github.com/opencontainers/runtime-spec from 1.1.0 to 1.2.0
Bumps [github.com/opencontainers/runtime-spec](https://github.com/opencontainers/runtime-spec) from 1.1.0 to 1.2.0.
- [Release notes](https://github.com/opencontainers/runtime-spec/releases)
- [Changelog](https://github.com/opencontainers/runtime-spec/blob/main/ChangeLog)
- [Commits](https://github.com/opencontainers/runtime-spec/compare/v1.1.0...v1.2.0)

---
updated-dependencies:
- dependency-name: github.com/opencontainers/runtime-spec
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-01 22:42:29 +02:00
Evan Lezar
6b9dee5b77 Allow finer control of pushing images
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-01 22:42:13 +02:00
Evan Lezar
50bbf32cf0 Add GOLANGCI_LINT_CACHE to docker make targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-01 22:27:06 +02:00
Evan Lezar
413c1264ce Merge pull request #391 from elezar/add-lint-excludes-for-prestart
Add lint exclude for hooks.Prestart deprecation
2024-03-01 22:20:54 +02:00
Evan Lezar
c084756e48 Add lint exclude for hooks.Prestart deprecation
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-01 22:18:27 +02:00
Evan Lezar
6265a2d89e Merge pull request #389 from NVIDIA/dependabot/go_modules/main/github.com/stretchr/testify-1.9.0
Bump github.com/stretchr/testify from 1.8.4 to 1.9.0
2024-03-01 22:16:05 +02:00
dependabot[bot]
72778ee536 Bump github.com/stretchr/testify from 1.8.4 to 1.9.0
Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.8.4 to 1.9.0.
- [Release notes](https://github.com/stretchr/testify/releases)
- [Commits](https://github.com/stretchr/testify/compare/v1.8.4...v1.9.0)

---
updated-dependencies:
- dependency-name: github.com/stretchr/testify
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-01 15:45:26 +00:00
Evan Lezar
2f11a190bf Merge pull request #388 from elezar/add-gh-pages-dependabot
Add dependabot for gh-pages branches
2024-03-01 17:05:02 +02:00
Evan Lezar
2d394f4624 Add dependabot for gh-pages branches
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-03-01 16:55:14 +02:00
Evan Lezar
ea55757bc3 Merge pull request #382 from elezar/remove-centos7-image
Remove centos7 container-toolkit image
2024-02-29 12:01:26 +02:00
Evan Lezar
2a620dc845 Merge pull request #387 from elezar/update-libnvidia-container
Update libnvidia-container
2024-02-29 12:00:34 +02:00
Evan Lezar
bad5369760 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-29 11:53:51 +02:00
Evan Lezar
2623e8a707 Allow finer control of pushing images
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-29 11:45:40 +02:00
Evan Lezar
05dd438489 Remove centos7 container-toolkit image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-28 15:45:09 +02:00
Evan Lezar
6780afbed1 Merge pull request #379 from NVIDIA/exists-fallback
[R550 driver support] add fallback logic to device.Exists(name)
2024-02-27 22:25:33 +02:00
Tariq Ibrahim
f80f4c485d [R550 driver support] add fallback logic to device.Exists(name)
Signed-off-by: Tariq Ibrahim <tibrahim@nvidia.com>
2024-02-27 11:59:35 -08:00
Evan Lezar
ac63063362 Merge pull request #375 from klueska/add-imex-support
Add imex support
2024-02-27 13:24:07 +02:00
Kevin Klues
761a425e0d Update reference to latest libnvidia-container
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2024-02-27 10:17:32 +01:00
Kevin Klues
296d4560b0 Add support for an NVIDIA_IMEX_CHANNELS envvar
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2024-02-26 20:09:43 +01:00
Evan Lezar
0409824106 Merge pull request #370 from elezar/remove-libnvidia-container0-dependency
Remove additional libnvidia-container0 dependency
2024-02-19 13:56:02 +01:00
Evan Lezar
562addc3c6 Remove additional libnvidia-container0 dependency
This change removes the additional libnvidia-container0=0.10.0+jetpack dependency
that was introduced for Tegra-based systems. These have since been migrated to
CDI-based direct injection using the NVIDIA Container Runtime.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-19 13:12:57 +01:00
Evan Lezar
ae82b2d9b6 Merge pull request #345 from elezar/allow-skip-of-device-nodes
Add --create-device-nodes option to toolkit config
2024-02-14 20:28:15 +01:00
Evan Lezar
355997d2d6 Merge pull request #314 from elezar/CNT-4032/mulitple-naming-strategies
Allow multiple naming strategies when generating CDI specification
2024-02-13 16:46:54 +01:00
Evan Lezar
b6efd3091d Use index and uuid as default device-name-strategies
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-13 16:38:18 +01:00
Evan Lezar
52da12cf9a Allow multiple device name strategies to be specified
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-13 16:38:05 +01:00
Evan Lezar
cd7d586afa Also ignore CDI errors if required
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-13 12:37:41 +01:00
Evan Lezar
cc4c2783a3 Add --create-device-nodes option to toolkit config
This change adds a --create-device-nodes option to the toolkit config CLI.
Most noteably, this allows the creation of control devices to be skipped
when CDI spec generation is enabled.

Currently values of "", "node", and "control" are supported and can be set
via the command line flag or the CREATE_DEVICE_NODES environment variable.

The default value of CREATE_DEVICE_NODES=control will trigger the creation
of control device nodes. Setting this envvar to include the (comma-separated)
strings of "" or "none" will disable device node creation regardless of
whether other supported strings are included.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-13 12:37:41 +01:00
Evan Lezar
a8d48808d7 Merge pull request #354 from elezar/add-release-1.14-dependabot
Add dependabot for release-1.14
2024-02-12 17:34:38 +01:00
Evan Lezar
aa724f1ac6 Add dependabot for release-1.14
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-12 17:32:18 +01:00
Evan Lezar
519b9f3cc8 Merge pull request #353 from elezar/update-changelog
Update changelog for #330
2024-02-12 17:29:06 +01:00
Evan Lezar
6e1bc0d7fb Update changelog for #330
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-12 17:28:40 +01:00
Evan Lezar
a2a1a78620 Merge pull request #330 from tariq1890/nvidia-dev-maj-num-lookup
add fallback logic when retrieving major number of the nvidia control device
2024-02-12 17:22:32 +01:00
Evan Lezar
ab7693ac9f Merge pull request #347 from NVIDIA/dependabot/go_modules/main/golang.org/x/mod-0.15.0
Bump golang.org/x/mod from 0.14.0 to 0.15.0
2024-02-12 15:47:01 +01:00
dependabot[bot]
f4df5308d0 Bump golang.org/x/mod from 0.14.0 to 0.15.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.14.0 to 0.15.0.
- [Commits](https://github.com/golang/mod/compare/v0.14.0...v0.15.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 13:35:09 +00:00
Evan Lezar
8dcc57c614 Merge pull request #348 from NVIDIA/dependabot/go_modules/main/github.com/sirupsen/logrus-1.9.3
Bump github.com/sirupsen/logrus from 1.9.0 to 1.9.3
2024-02-12 14:35:07 +01:00
Evan Lezar
6594f06e9a Merge pull request #349 from NVIDIA/dependabot/go_modules/main/golang.org/x/sys-0.17.0
Bump golang.org/x/sys from 0.16.0 to 0.17.0
2024-02-12 14:34:27 +01:00
Evan Lezar
8a706a97a0 Merge pull request #350 from NVIDIA/dependabot/github_actions/golangci/golangci-lint-action-4
Bump golangci/golangci-lint-action from 3 to 4
2024-02-12 14:33:43 +01:00
Evan Lezar
39f0bf21ce Merge pull request #346 from elezar/consistent-driver-root
Specify DRIVER_ROOT consistently
2024-02-12 14:33:15 +01:00
dependabot[bot]
0915a12e38 Bump golangci/golangci-lint-action from 3 to 4
Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 3 to 4.
- [Release notes](https://github.com/golangci/golangci-lint-action/releases)
- [Commits](https://github.com/golangci/golangci-lint-action/compare/v3...v4)

---
updated-dependencies:
- dependency-name: golangci/golangci-lint-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 08:13:37 +00:00
dependabot[bot]
e6cd897cc4 Bump golang.org/x/sys from 0.16.0 to 0.17.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.16.0 to 0.17.0.
- [Commits](https://github.com/golang/sys/compare/v0.16.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-11 08:49:59 +00:00
dependabot[bot]
35600910e0 Bump github.com/sirupsen/logrus from 1.9.0 to 1.9.3
Bumps [github.com/sirupsen/logrus](https://github.com/sirupsen/logrus) from 1.9.0 to 1.9.3.
- [Release notes](https://github.com/sirupsen/logrus/releases)
- [Changelog](https://github.com/sirupsen/logrus/blob/master/CHANGELOG.md)
- [Commits](https://github.com/sirupsen/logrus/compare/v1.9.0...v1.9.3)

---
updated-dependencies:
- dependency-name: github.com/sirupsen/logrus
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-11 08:49:55 +00:00
Evan Lezar
f89cef307d Specify DRIVER_ROOT consistently
This change ensures that CLI tools that require the path to the
driver root accept both the NVIDIA_DRIVER_ROOT and DRIVER_ROOT
environment variables in addition to the --driver-root command
line argument.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-09 14:28:56 +01:00
Evan Lezar
e96edb3f36 Merge pull request #342 from elezar/add-spec-dirs-to-cdi-list
Add spec-dir flag to nvidia-ctk cdi list command
2024-02-09 14:17:45 +01:00
Evan Lezar
bab4ec30af Improve error reporting for cdi list
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-08 14:58:48 +01:00
Evan Lezar
b6ab444529 Add spec-dirs argument to cdi list
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-08 14:50:14 +01:00
Evan Lezar
15d905def0 Merge pull request #309 from NVIDIA/dependabot/go_modules/main/github.com/urfave/cli/v2-2.27.1
Bump github.com/urfave/cli/v2 from 2.3.0 to 2.27.1
2024-02-07 11:01:46 +01:00
Evan Lezar
e64b723b71 Add proc.devices.New constructor
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-06 11:19:43 +01:00
Evan Lezar
f0545dd979 Merge pull request #333 from elezar/test-on-darwin
Fix build and tests targets on darwin
2024-02-06 10:16:04 +01:00
Tariq Ibrahim
f414ac2865 add fallback logic when retrieving major number of the nvidia control device
Signed-off-by: Tariq Ibrahim <tibrahim@nvidia.com>
2024-02-05 22:55:54 -08:00
Evan Lezar
772cf77dcc Fix build and test on darwin
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-05 23:58:28 +01:00
dependabot[bot]
026055a0b7 Bump github.com/urfave/cli/v2 from 2.3.0 to 2.27.1
Bumps [github.com/urfave/cli/v2](https://github.com/urfave/cli) from 2.3.0 to 2.27.1.
- [Release notes](https://github.com/urfave/cli/releases)
- [Changelog](https://github.com/urfave/cli/blob/main/docs/CHANGELOG.md)
- [Commits](https://github.com/urfave/cli/compare/v2.3.0...v2.27.1)

---
updated-dependencies:
- dependency-name: github.com/urfave/cli/v2
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-05 15:04:18 +00:00
Evan Lezar
812e6a2402 Merge pull request #308 from NVIDIA/dependabot/go_modules/main/github.com/pelletier/go-toml-1.9.5
Bump github.com/pelletier/go-toml from 1.9.4 to 1.9.5
2024-02-05 16:03:36 +01:00
Evan Lezar
b56aebb26f Merge pull request #310 from NVIDIA/dependabot/go_modules/main/golang.org/x/sys-0.16.0
Bump golang.org/x/sys from 0.7.0 to 0.16.0
2024-02-05 10:39:58 +01:00
dependabot[bot]
870903e03e Bump golang.org/x/sys from 0.7.0 to 0.16.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.7.0 to 0.16.0.
- [Commits](https://github.com/golang/sys/compare/v0.7.0...v0.16.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-05 09:33:36 +00:00
dependabot[bot]
b233a8b6ba Bump github.com/pelletier/go-toml from 1.9.4 to 1.9.5
Bumps [github.com/pelletier/go-toml](https://github.com/pelletier/go-toml) from 1.9.4 to 1.9.5.
- [Release notes](https://github.com/pelletier/go-toml/releases)
- [Changelog](https://github.com/pelletier/go-toml/blob/v2/.goreleaser.yaml)
- [Commits](https://github.com/pelletier/go-toml/compare/v1.9.4...v1.9.5)

---
updated-dependencies:
- dependency-name: github.com/pelletier/go-toml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-05 09:32:35 +00:00
Evan Lezar
e96e1baed5 Merge pull request #336 from NVIDIA/dependabot/go_modules/main/github.com/fsnotify/fsnotify-1.7.0
Bump github.com/fsnotify/fsnotify from 1.5.4 to 1.7.0
2024-02-05 10:31:45 +01:00
dependabot[bot]
dce368e308 Bump github.com/fsnotify/fsnotify from 1.5.4 to 1.7.0
Bumps [github.com/fsnotify/fsnotify](https://github.com/fsnotify/fsnotify) from 1.5.4 to 1.7.0.
- [Release notes](https://github.com/fsnotify/fsnotify/releases)
- [Changelog](https://github.com/fsnotify/fsnotify/blob/main/CHANGELOG.md)
- [Commits](https://github.com/fsnotify/fsnotify/compare/v1.5.4...v1.7.0)

---
updated-dependencies:
- dependency-name: github.com/fsnotify/fsnotify
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-04 08:26:11 +00:00
Evan Lezar
15f609a52d Merge pull request #332 from elezar/filter-actions
Enable a subset of package and image builds in PRs
2024-02-02 11:34:10 +01:00
Evan Lezar
0bf08085ce Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-02 09:21:54 +01:00
Evan Lezar
da68ad393c Enable subset of image builds in PRs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-02 09:20:05 +01:00
Evan Lezar
2f3600af9a Merge pull request #329 from elezar/migrate-libnvidia-container-to-github
Update libnvidia-container to github ref
2024-02-01 17:08:55 +01:00
Evan Lezar
0ff28aa21b Update libnvidia-container to github ref
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-01 16:36:10 +01:00
Evan Lezar
b88ff4470c Merge pull request #328 from NVIDIA/revert-323-sheckler/cuda-12.3.2
Revert "chore: Update CUDA base image to 12.3.2"
2024-02-01 16:28:59 +01:00
Evan Lezar
cfb1daee0a Revert "chore: Update CUDA base image to 12.3.2" 2024-02-01 16:27:53 +01:00
Evan Lezar
e3ab55beed Merge pull request #323 from heckler1/sheckler/cuda-12.3.2
chore: Update CUDA base image to 12.3.2
2024-02-01 14:02:33 +01:00
Evan Lezar
9530d9949f Merge pull request #325 from elezar/remove-jenkinsfile
Remove unneeded Jenkinsfile
2024-02-01 08:41:41 +01:00
Evan Lezar
6b2cd487a6 Remove unneeded Jenkinsfile
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-31 22:05:01 +01:00
Stephen Heckler
e5ec408a5c Release 1.15.0-rc4
Signed-off-by: Stephen Heckler <sheckler@cloudflare.com>
2024-01-31 13:49:31 -06:00
Stephen Heckler
301b666790 chore: Update CUDA base image to 12.3.2
Signed-off-by: Stephen Heckler <sheckler@cloudflare.com>
2024-01-31 13:48:05 -06:00
Evan Lezar
e99b519509 Merge pull request #321 from elezar/bump-libnvidia-container
Update libnvidia-container
2024-01-30 16:44:01 +01:00
Evan Lezar
d123273800 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-30 16:11:57 +01:00
Evan Lezar
07136d9ac4 Merge pull request #312 from NVIDIA/dependabot/go_modules/main/golang.org/x/mod-0.14.0
Bump golang.org/x/mod from 0.5.0 to 0.14.0
2024-01-30 16:04:48 +01:00
Evan Lezar
0ef06be477 Merge pull request #320 from elezar/remove-centos8-jobs
Remove centos8 jobs
2024-01-30 16:03:30 +01:00
Evan Lezar
5a70e75547 Use stable/rpm repo for release tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-30 12:40:57 +01:00
Evan Lezar
46b4cd7b03 Remove unused centos8 jobs
This change removes the centos8-x86_64 and centos8-aarch64 pipeline jobs.

These packages are no longer used since centos7 packages are used instead.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-30 12:40:16 +01:00
dependabot[bot]
7abbd98ff0 Bump golang.org/x/mod from 0.5.0 to 0.14.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.5.0 to 0.14.0.
- [Commits](https://github.com/golang/mod/compare/v0.5.0...v0.14.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-01-29 14:20:35 +00:00
673 changed files with 62750 additions and 21803 deletions

View File

@@ -33,6 +33,7 @@ stages:
- test
- scan
- release
- sign
.pipeline-trigger-rules:
rules:
@@ -144,7 +145,7 @@ trigger-pipeline:
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
script:
- make -f build/container/Makefile test-${DIST}
- make -f deployments/container/Makefile test-${DIST}
# Define the test targets
test-packaging:
@@ -194,7 +195,7 @@ test-packaging:
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
# Target
- make -f build/container/Makefile push-${DIST}
- make -f deployments/container/Makefile push-${DIST}
# Define a staging release step that pushes an image to an internal "staging" repository
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
@@ -224,13 +225,6 @@ test-packaging:
OUT_IMAGE_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}"
# Define the release jobs
release:staging-centos7:
extends:
- .release:staging
- .dist-centos7
needs:
- image-centos7
release:staging-ubi8:
extends:
- .release:staging

View File

@@ -14,7 +14,54 @@ updates:
labels:
- dependencies
- package-ecosystem: "docker"
target-branch: main
directory: "/deployments/container"
schedule:
interval: "daily"
- package-ecosystem: "gomod"
# This defines a specific dependabot rule for the latest release-* branch.
target-branch: release-1.15
directory: "/"
schedule:
interval: "weekly"
day: "sunday"
ignore:
- dependency-name: k8s.io/*
labels:
- dependencies
- maintenance
- package-ecosystem: "docker"
target-branch: release-1.15
directory: "/deployments/container"
schedule:
interval: "daily"
labels:
- dependencies
- maintenance
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "github-actions"
target-branch: gh-pages
directory: "/"
schedule:
interval: "weekly"
day: "monday"
# Allow dependabot to update the libnvidia-container submodule.
- package-ecosystem: "gitsubmodule"
target-branch: main
directory: "/"
allow:
- dependency-name: "third_party/libnvidia-container"
schedule:
interval: "daily"
labels:
- dependencies
- libnvidia-container

View File

@@ -16,6 +16,9 @@ name: Golang
on:
pull_request:
types:
- opened
- synchronize
branches:
- main
- release-*
@@ -29,28 +32,45 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION :=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- name: Lint
uses: golangci/golangci-lint-action@v3
uses: golangci/golangci-lint-action@v6
with:
version: latest
args: -v --timeout 5m
skip-cache: true
- name: Check golang modules
run: make check-vendor
test:
name: Unit test
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION :=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: '1.20'
go-version: ${{ env.GOLANG_VERSION }}
- run: make test
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- name: Build
run: make docker-build

View File

@@ -16,6 +16,13 @@
name: image
on:
pull_request:
types:
- opened
- synchronize
branches:
- main
- release-*
push:
branches:
- main
@@ -26,13 +33,24 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
package:
target:
- ubuntu18.04-arm64
- ubuntu18.04-amd64
- ubuntu18.04-ppc64le
- centos7-aarch64
- centos7-x86_64
- centos8-ppc64le
ispr:
- ${{github.event_name == 'pull_request'}}
exclude:
- ispr: true
target: ubuntu18.04-arm64
- ispr: true
target: ubuntu18.04-ppc64le
- ispr: true
target: centos7-aarch64
- ispr: true
target: centos8-ppc64le
fail-fast: false
steps:
- uses: actions/checkout@v4
@@ -41,23 +59,31 @@ jobs:
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: build ${{ matrix.package }} packages
- name: build ${{ matrix.target }} packages
run: |
sudo apt-get install -y coreutils build-essential sed git bash make
echo "Building packages"
./scripts/build-packages.sh ${{ matrix.package }}
./scripts/build-packages.sh ${{ matrix.target }}
- name: 'Upload Artifacts'
uses: actions/upload-artifact@v4
with:
compression-level: 0
name: toolkit-container-${{ matrix.package }}-${{ github.run_id }}
name: toolkit-container-${{ matrix.target }}-${{ github.run_id }}
path: ${{ github.workspace }}/dist/*
image:
runs-on: ubuntu-latest
strategy:
matrix:
image: [ubuntu20.04, centos7, ubi8, packaging]
matrix:
dist:
- ubuntu20.04
- ubi8
- packaging
ispr:
- ${{github.event_name == 'pull_request'}}
exclude:
- ispr: true
dist: ubi8
needs: packages
steps:
- uses: actions/checkout@v4
@@ -70,17 +96,21 @@ jobs:
REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
echo "${REPO_FULL_NAME}"
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
GENERATE_ARTIFACTS="false"
if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
GENERATE_ARTIFACTS="false"
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
GENERATE_ARTIFACTS="true"
PUSH_ON_BUILD="false"
BUILD_MULTI_ARCH_IMAGES="false"
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
if [[ "${{ github.actor }}" != "dependabot[bot]" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
# For non-fork PRs that are not created by dependabot we do push images
PUSH_ON_BUILD="true"
fi
elif [[ "${{ github.event_name }}" == "push" ]]; then
GENERATE_ARTIFACTS="true"
# On push events we do generate images and enable muilti-arch builds
PUSH_ON_BUILD="true"
BUILD_MULTI_ARCH_IMAGES="true"
fi
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
echo "PUSH_ON_BUILD=${PUSH_ON_BUILD}" >> $GITHUB_ENV
echo "BUILD_MULTI_ARCH_IMAGES=${BUILD_MULTI_ARCH_IMAGES}" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
@@ -90,7 +120,7 @@ jobs:
uses: actions/download-artifact@v4
with:
path: ${{ github.workspace }}/dist/
pattern: toolkit-container-*
pattern: toolkit-container-*-${{ github.run_id }}
merge-multiple: true
- name: Login to GitHub Container Registry
@@ -105,4 +135,4 @@ jobs:
VERSION: ${COMMIT_SHORT_SHA}
run: |
echo "${VERSION}"
make -f build/container/Makefile build-${{ matrix.image }}
make -f deployments/container/Makefile build-${{ matrix.dist }}

52
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,52 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run this workflow on new tags
name: Release
on:
push:
tags:
- v*
jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Create Draft Release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OWNER: ${{ github.repository_owner }}
REPO: ${{ github.event.repository.name }}
run: |
GH_EXTRA_ARGS=""
if [[ ${{ github.ref }} == *-rc.* ]]; then
GH_EXTRA_ARGS="--prerelease"
fi
gh release create ${{ github.ref }} \
--draft \
-t "${{ github.ref }}" \
-R $OWNER/$REPO \
--verify-tag \
$GH_EXTRA_ARGS
- name: Upload Release Artifacts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OWNER: ${{ github.repository_owner }}
REPO: ${{ github.event.repository.name }}
run: |
gh release upload ${{ github.ref }} CHANGELOG.md -R $OWNER/$REPO

View File

@@ -76,24 +76,12 @@ package-centos7-x86_64:
- .dist-centos7
- .arch-x86_64
package-centos8-aarch64:
extends:
- .package-build
- .dist-centos8
- .arch-aarch64
package-centos8-ppc64le:
extends:
- .package-build
- .dist-centos8
- .arch-ppc64le
package-centos8-x86_64:
extends:
- .package-build
- .dist-centos8
- .arch-x86_64
package-ubuntu18.04-amd64:
extends:
- .package-build
@@ -138,15 +126,7 @@ package-ubuntu18.04-ppc64le:
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
script:
- make -f build/container/Makefile build-${DIST}
image-centos7:
extends:
- .image-build
- .package-artifacts
- .dist-centos7
needs:
- package-centos7-x86_64
- make -f deployments/container/Makefile build-${DIST}
image-ubi8:
extends:
@@ -176,8 +156,6 @@ image-packaging:
- .package-artifacts
- .dist-packaging
needs:
- job: package-centos8-aarch64
- job: package-centos8-x86_64
- job: package-ubuntu18.04-amd64
- job: package-ubuntu18.04-arm64
- job: package-amazonlinux2-aarch64

2
.gitmodules vendored
View File

@@ -1,4 +1,4 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
url = https://github.com/NVIDIA/libnvidia-container.git
branch = main

View File

@@ -20,6 +20,9 @@ linters-settings:
local-prefixes: github.com/NVIDIA/nvidia-container-toolkit
issues:
exclude:
# The legacy hook relies on spec.Hooks.Prestart, which is deprecated as of the v1.2.0 OCI runtime spec.
- "SA1019:(.+).Prestart is deprecated(.+)"
exclude-rules:
# Exclude the gocritic dupSubExpr issue for cgo files.
- path: internal/dxcore/dxcore.go

View File

@@ -33,7 +33,7 @@ variables:
# On the multi-arch builder we don't need the qemu setup.
SKIP_QEMU_SETUP: "1"
# Define the public staging registry
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
STAGING_REGISTRY: ghcr.io/nvidia
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
KITMAKER_RELEASE_FOLDER: "kitmaker"
@@ -67,12 +67,7 @@ variables:
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
script:
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
image-centos7:
extends:
- .dist-centos7
- .image-pull
- make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
image-ubi8:
extends:
@@ -132,14 +127,6 @@ image-packaging:
- policy_evaluation.json
# Define the scan targets
scan-centos7-amd64:
extends:
- .dist-centos7
- .platform-amd64
- .scan
needs:
- image-centos7
scan-ubuntu20.04-amd64:
extends:
- .dist-ubuntu20.04
@@ -243,11 +230,6 @@ release:staging-ubuntu20.04:
# Define the external release targets
# Release to NGC
release:ngc-centos7:
extends:
- .dist-centos7
- .release:ngc
release:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04
@@ -262,3 +244,62 @@ release:ngc-packaging:
extends:
- .dist-packaging
- .release:ngc
# Define the external image signing steps for NGC
# Download the ngc cli binary for use in the sign steps
.ngccli-setup:
before_script:
- apt-get update && apt-get install -y curl unzip jq
- |
if [ -z "${NGCCLI_VERSION}" ]; then
NGC_VERSION_URL="https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions"
# Extract the latest version from the JSON data using jq
export NGCCLI_VERSION=$(curl -s $NGC_VERSION_URL | jq -r '.recipe.latestVersionIdStr')
fi
echo "NGCCLI_VERSION ${NGCCLI_VERSION}"
- curl -sSLo ngccli_linux.zip https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/${NGCCLI_VERSION}/files/ngccli_linux.zip
- unzip ngccli_linux.zip
- chmod u+x ngc-cli/ngc
# .sign forms the base of the deployment jobs which signs images in the CI registry.
# This is extended with the image name and version to be deployed.
.sign:ngc:
image: ubuntu:latest
stage: sign
rules:
- if: $CI_COMMIT_TAG
variables:
NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}"
IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
IMAGE_TAG: "${CI_COMMIT_TAG}-${DIST}"
retry:
max: 2
before_script:
- !reference [.ngccli-setup, before_script]
# We ensure that the IMAGE_NAME and IMAGE_TAG is set
- 'echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1'
- 'echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1'
script:
- 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"'
- ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia
sign:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04
- .sign:ngc
needs:
- release:ngc-ubuntu20.04
sign:ngc-ubi8:
extends:
- .dist-ubi8
- .sign:ngc
needs:
- release:ngc-ubi8
sign:ngc-packaging:
extends:
- .dist-packaging
- .sign:ngc
needs:
- release:ngc-packaging

View File

@@ -1,5 +1,47 @@
# NVIDIA Container Toolkit Changelog
## v1.16.0-rc.1
- Support vulkan ICD files directly in a driver root. This allows for the discovery of vulkan files in GKE driver installations.
- Increase priority of ld.so.conf.d config file injected into container. This ensures that injected libraries are preferred over libraries present in the container.
- Set default CDI spec permissions to 644. This fixes permission issues when using the `nvidia-ctk cdi transform` functions.
- Add `dev-root` option to `nvidia-ctk system create-device-nodes` command.
- Fix location of `libnvidia-ml.so.1` when a non-standard driver root is used. This enabled CDI spec generation when using the driver container on a host.
- Recalculate minimum required CDI spec version on save.
- Move `nvidia-ctk hook` commands to a separate `nvidia-cdi-hook` binary. The same subcommands are supported.
- Use `:` as an `nvidia-ctk config --set` list separator. This fixes a bug when trying to set config options that are lists.
- [toolkit-container] Bump CUDA base image version to 12.5.0
- [toolkit-container] Allow the path to `toolkit.pid` to be specified directly.
- [toolkit-container] Remove provenance information from image manifests.
- [toolkit-container] Add `dev-root` option when configuring the toolkit. This adds support for GKE driver installations.
## v1.15.0
* Remove `nvidia-container-runtime` and `nvidia-docker2` packages.
* Use `XDG_DATA_DIRS` environment variable when locating config files such as graphics config files.
* Add support for v0.7.0 Container Device Interface (CDI) specification.
* Add `--config-search-path` option to `nvidia-ctk cdi generate` command. These paths are used when locating driver files such as graphics config files.
* Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
* Add support for v1.2.0 OCI Runtime specification.
* Explicitly set `NVIDIA_VISIBLE_DEVICES=void` in generated CDI specifications. This prevents the NVIDIA Container Runtime from making additional modifications.
* [libnvidia-container] Use D3DKMTEnumAdapters3 to enumerate adpaters on WSL2 if available.
* [toolkit-container] Bump CUDA base image version to 12.4.1
## v1.15.0-rc.4
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
CDI spec to be generated that includes GPUs by index and UUID.
* Set the default `--device-name-strategy` for the `nvidia-ctk cdi generate` command to `[index, uuid]`.
* Remove `libnvidia-container0` jetpack dependency included for legacy Tegra-based systems.
* Add `NVIDIA_VISIBLE_DEVICES=void` to generated CDI specifications.
* [toolkit-container] Remove centos7 image. The ubi8 image can be used on all RPM-based platforms.
* [toolkit-container] Bump CUDA base image version to 12.3.2
## v1.15.0-rc.3
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.

View File

@@ -19,7 +19,7 @@ where `TARGET` is a make target that is valid for each of the sub-components.
These include:
* `ubuntu18.04-amd64`
* `centos8-x86_64`
* `centos7-x86_64`
If no `TARGET` is specified, all valid release targets are built.

142
Jenkinsfile vendored
View File

@@ -1,142 +0,0 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
podTemplate (cloud:'sw-gpu-cloudnative',
containers: [
containerTemplate(name: 'docker', image: 'docker:dind', ttyEnabled: true, privileged: true),
containerTemplate(name: 'golang', image: 'golang:1.16.3', ttyEnabled: true)
]) {
node(POD_LABEL) {
def scmInfo
stage('checkout') {
scmInfo = checkout(scm)
}
stage('dependencies') {
container('golang') {
sh 'GO111MODULE=off go get -u github.com/client9/misspell/cmd/misspell'
sh 'GO111MODULE=off go get -u github.com/gordonklaus/ineffassign'
sh 'GO111MODULE=off go get -u golang.org/x/lint/golint'
}
container('docker') {
sh 'apk add --no-cache make bash git'
}
}
stage('check') {
parallel (
getGolangStages(["assert-fmt", "lint", "vet", "ineffassign", "misspell"])
)
}
stage('test') {
parallel (
getGolangStages(["test"])
)
}
def versionInfo
stage('version') {
container('docker') {
versionInfo = getVersionInfo(scmInfo)
println "versionInfo=${versionInfo}"
}
}
def dist = 'ubuntu20.04'
def arch = 'amd64'
def stageLabel = "${dist}-${arch}"
stage('build-one') {
container('docker') {
stage (stageLabel) {
sh "make ${dist}-${arch}"
}
}
}
stage('release') {
container('docker') {
stage (stageLabel) {
def component = 'main'
def repository = 'sw-gpu-cloudnative-debian-local/pool/main/'
def uploadSpec = """{
"files":
[ {
"pattern": "./dist/${dist}/${arch}/*.deb",
"target": "${repository}",
"props": "deb.distribution=${dist};deb.component=${component};deb.architecture=${arch}"
}
]
}"""
sh "echo starting release with versionInfo=${versionInfo}"
if (versionInfo.isTag) {
// upload to artifactory repository
def server = Artifactory.server 'sw-gpu-artifactory'
server.upload spec: uploadSpec
} else {
sh "echo skipping release for non-tagged build"
}
}
}
}
}
}
def getGolangStages(def targets) {
stages = [:]
for (t in targets) {
stages[t] = getLintClosure(t)
}
return stages
}
def getLintClosure(def target) {
return {
container('golang') {
stage(target) {
sh "make ${target}"
}
}
}
}
// getVersionInfo returns a hash of version info
def getVersionInfo(def scmInfo) {
def versionInfo = [
isTag: isTag(scmInfo.GIT_BRANCH)
]
scmInfo.each { k, v -> versionInfo[k] = v }
return versionInfo
}
def isTag(def branch) {
if (!branch.startsWith('v')) {
return false
}
def version = shOutput('git describe --all --exact-match --always')
return version == "tags/${branch}"
}
def shOuptut(def script) {
return sh(script: script, returnStdout: true).trim()
}

View File

@@ -38,8 +38,8 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
CHECK_TARGETS := golangci-lint
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
CHECK_TARGETS := lint
MAKE_TARGETS := binaries build check fmt test examples cmds coverage generate licenses vendor check-vendor $(CHECK_TARGETS)
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
@@ -53,22 +53,26 @@ CLI_VERSION = $(VERSION)
endif
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
GOOS ?= linux
binaries: cmds
ifneq ($(PREFIX),)
cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)
ifneq ($(shell uname),Darwin)
EXTLDFLAGS = -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
else
EXTLDFLAGS = -Wl,-undefined,dynamic_lookup
endif
$(CMD_TARGETS): cmd-%:
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
go build -ldflags "-s -w '-extldflags=$(EXTLDFLAGS)' -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
build:
GOOS=$(GOOS) go build ./...
go build ./...
examples: $(EXAMPLE_TARGETS)
$(EXAMPLE_TARGETS): example-%:
GOOS=$(GOOS) go build ./examples/$(*)
go build ./examples/$(*)
all: check test build binary
check: $(CHECK_TARGETS)
@@ -83,15 +87,23 @@ goimports:
go list -f {{.Dir}} $(MODULE)/... \
| xargs goimports -local $(MODULE) -w
golangci-lint:
lint:
golangci-lint run ./...
vendor:
go mod tidy
go mod vendor
go mod verify
check-vendor: vendor
git diff --quiet HEAD -- go.mod go.sum vendor
licenses:
go-licenses csv $(MODULE)/...
COVERAGE_FILE := coverage.out
test: build cmds
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
go test -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
coverage: test
cat $(COVERAGE_FILE) | grep -v "_mock.go" > $(COVERAGE_FILE).no-mocks
@@ -100,12 +112,13 @@ coverage: test
generate:
go generate $(MODULE)/...
$(DOCKER_TARGETS): docker-%:
$(DOCKER_TARGETS): docker-%:
@echo "Running 'make $(*)' in container image $(BUILDIMAGE)"
$(DOCKER) run \
--rm \
-e GOCACHE=/tmp/.cache/go \
-e GOMODCACHE=/tmp/.cache/gomod \
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
-v $(PWD):/work \
-w /work \
--user $$(id -u):$$(id -g) \
@@ -120,6 +133,7 @@ PHONY: .shell
-ti \
-e GOCACHE=/tmp/.cache/go \
-e GOMODCACHE=/tmp/.cache/gomod \
-e GOLANGCI_LINT_CACHE=/tmp/.cache/golangci-lint \
-v $(PWD):/work \
-w /work \
--user $$(id -u):$$(id -g) \

View File

@@ -0,0 +1,31 @@
# NVIDIA CDI Hook
The CLI `nvidia-cdi-hook` provides container device runtime hook capabilities when
called by a container runtime, as specific in a
[Container Device Interface](https://tags.cncf.io/container-device-interface/blob/main/SPEC.md)
file.
## Generating a CDI
The CDI itself is created for an NVIDIA-capable device using the
[`nvidia-ctk cdi generate`](../nvidia-ctk/) command.
When `nvidia-ctk cdi generate` is run, the CDI specification is generated as a yaml file.
The CDI specification provides instructions for a container runtime to set up devices, files and
other resources for the container prior to starting it. Those instructions
may include executing command-line tools to prepare the filesystem. The execution
of such command-line tools is called a hook.
`nvidia-cdi-hook` is the CLI tool that is expected to be called by the container runtime,
when specified by the CDI file.
See the [`nvidia-ctk` documentation](../nvidia-ctk/README.md) for more information
on generating a CDI file.
## Functionality
The `nvidia-cdi-hook` CLI provides the following functionality:
* `chmod` - Change the permissions of a file or directory inside the directory path to be mounted into a container.
* `create-symlinks` - Create symlinks inside the directory path to be mounted into a container.
* `update-ldcache` - Update the dynamic linker cache inside the directory path to be mounted into a container.

View File

@@ -0,0 +1,36 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package commands
import (
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
// New creates the commands associated with supported CDI hooks.
// These are shared by the nvidia-cdi-hook and nvidia-ctk hook commands.
func New(logger logger.Interface) []*cli.Command {
return []*cli.Command{
ldcache.NewCommand(logger),
symlinks.NewCommand(logger),
chmod.NewCommand(logger),
}
}

View File

@@ -0,0 +1,93 @@
/**
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
cli "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands"
)
// options defines the options that can be set for the CLI through config files,
// environment variables, or command line flags
type options struct {
// Debug indicates whether the CLI is started in "debug" mode
Debug bool
// Quiet indicates whether the CLI is started in "quiet" mode
Quiet bool
}
func main() {
logger := logrus.New()
// Create a options struct to hold the parsed environment variables or command line flags
opts := options{}
// Create the top-level CLI
c := cli.NewApp()
c.Name = "NVIDIA CDI Hook"
c.UseShortOptionHandling = true
c.EnableBashCompletion = true
c.Usage = "Command to structure files for usage inside a container, called as hooks from a container runtime, defined in a CDI yaml file"
c.Version = info.GetVersionString()
// Setup the flags for this command
c.Flags = []cli.Flag{
&cli.BoolFlag{
Name: "debug",
Aliases: []string{"d"},
Usage: "Enable debug-level logging",
Destination: &opts.Debug,
EnvVars: []string{"NVIDIA_CDI_DEBUG"},
},
&cli.BoolFlag{
Name: "quiet",
Usage: "Suppress all output except for errors; overrides --debug",
Destination: &opts.Quiet,
EnvVars: []string{"NVIDIA_CDI_QUIET"},
},
}
// Set log-level for all subcommands
c.Before = func(c *cli.Context) error {
logLevel := logrus.InfoLevel
if opts.Debug {
logLevel = logrus.DebugLevel
}
if opts.Quiet {
logLevel = logrus.ErrorLevel
}
logger.SetLevel(logLevel)
return nil
}
// Define the subcommands
c.Commands = commands.New(logger)
// Run the CLI
err := c.Run(os.Args)
if err != nil {
logger.Errorf("%v", err)
os.Exit(1)
}
}

View File

@@ -153,8 +153,11 @@ func (m command) resolveLDConfigPath(path string) string {
return strings.TrimPrefix(config.NormalizeLDConfigPath("@"+path), "@")
}
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
// createConfig creates (or updates) /etc/ld.so.conf.d/00-nvcr-<RANDOM_STRING>.conf in the container
// to include the required paths.
// Note that the 00-nvcr prefix is chosen to ensure that these libraries have
// a higher precedence than other libraries on the system but are applied AFTER
// 00-cuda-compat.conf.
func (m command) createConfig(root string, folders []string) error {
if len(folders) == 0 {
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
@@ -165,7 +168,7 @@ func (m command) createConfig(root string, folders []string) error {
return fmt.Errorf("failed to create ld.so.conf.d: %v", err)
}
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "00-nvcr-*.conf")
if err != nil {
return fmt.Errorf("failed to create config file: %v", err)
}

View File

@@ -23,6 +23,7 @@ const (
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)
@@ -38,6 +39,7 @@ type nvidiaConfig struct {
Devices string
MigConfigDevices string
MigMonitorDevices string
ImexChannels string
DriverCapabilities string
// Requirements defines the requirements DSL for the container to run.
// This is empty if no specific requirements are needed, or if requirements are
@@ -274,6 +276,14 @@ func getMigDevices(image image.CUDA, envvar string) *string {
return &devices
}
func getImexChannels(image image.CUDA) *string {
if !image.HasEnvvar(envNVImexChannels) {
return nil
}
chans := image.Getenv(envNVImexChannels)
return &chans
}
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities
@@ -328,6 +338,11 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
var imexChannels string
if c := getImexChannels(image); c != nil {
imexChannels = *c
}
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
requirements, err := image.GetRequirements()
@@ -339,6 +354,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
Devices: devices,
MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices,
ImexChannels: imexChannels,
DriverCapabilities: driverCapabilities,
Requirements: requirements,
}

View File

@@ -126,6 +126,9 @@ func doPrestart() {
if len(nvidia.MigMonitorDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
}
if len(nvidia.ImexChannels) > 0 {
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
}
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
if len(cap) == 0 {

View File

@@ -198,7 +198,7 @@ invoked from the command line as `runc` would. For example:
```sh
# Setup a rootfs based on Ubuntu 16.04
cd $(mktemp -d) && mkdir rootfs
curl -sS http://cdimage.ubuntu.com/ubuntu-base/releases/16.04/release/ubuntu-base-16.04-core-amd64.tar.gz | tar --exclude 'dev/*' -C rootfs -xz
curl -sS http://cdimage.ubuntu.com/ubuntu-base/releases/16.04/release/ubuntu-base-16.04.6-base-amd64.tar.gz | tar --exclude 'dev/*' -C rootfs -xz
# Create an OCI runtime spec
nvidia-container-runtime spec

View File

@@ -42,17 +42,18 @@ type command struct {
}
type options struct {
output string
format string
deviceNameStrategy string
driverRoot string
devRoot string
nvidiaCTKPath string
ldconfigPath string
mode string
vendor string
class string
output string
format string
deviceNameStrategies cli.StringSlice
driverRoot string
devRoot string
nvidiaCDIHookPath string
ldconfigPath string
mode string
vendor string
class string
configSearchPaths cli.StringSlice
librarySearchPaths cli.StringSlice
csv struct {
@@ -86,6 +87,11 @@ func (m command) build() *cli.Command {
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "config-search-path",
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
Destination: &opts.configSearchPaths,
},
&cli.StringFlag{
Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
@@ -109,11 +115,11 @@ func (m command) build() *cli.Command {
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
Destination: &opts.devRoot,
},
&cli.StringFlag{
&cli.StringSliceFlag{
Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
Value: nvcdi.DeviceNameStrategyIndex,
Destination: &opts.deviceNameStrategy,
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
Destination: &opts.deviceNameStrategies,
},
&cli.StringFlag{
Name: "driver-root",
@@ -126,9 +132,12 @@ func (m command) build() *cli.Command {
Destination: &opts.librarySearchPaths,
},
&cli.StringFlag{
Name: "nvidia-ctk-path",
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
Destination: &opts.nvidiaCTKPath,
Name: "nvidia-cdi-hook-path",
Aliases: []string{"nvidia-ctk-path"},
Usage: "Specify the path to use for the nvidia-cdi-hook in the generated CDI specification. " +
"If not specified, the PATH will be searched for `nvidia-cdi-hook`. " +
"NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.",
Destination: &opts.nvidiaCDIHookPath,
},
&cli.StringFlag{
Name: "ldconfig-path",
@@ -185,12 +194,14 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
}
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
if err != nil {
return err
for _, strategy := range opts.deviceNameStrategies.Value() {
_, err := nvcdi.NewDeviceNamer(strategy)
if err != nil {
return err
}
}
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
opts.nvidiaCDIHookPath = config.ResolveNVIDIACDIHookPath(m.logger, opts.nvidiaCDIHookPath)
if outputFileFormat := formatFromFilename(opts.output); outputFileFormat != "" {
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
@@ -241,19 +252,24 @@ func formatFromFilename(filename string) string {
}
func (m command) generateSpec(opts *options) (spec.Interface, error) {
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err)
var deviceNamers []nvcdi.DeviceNamer
for _, strategy := range opts.deviceNameStrategies.Value() {
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err)
}
deviceNamers = append(deviceNamers, deviceNamer)
}
cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(opts.driverRoot),
nvcdi.WithDevRoot(opts.devRoot),
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
nvcdi.WithNVIDIACDIHookPath(opts.nvidiaCDIHookPath),
nvcdi.WithLdconfigPath(opts.ldconfigPath),
nvcdi.WithDeviceNamer(deviceNamer),
nvcdi.WithDeviceNamers(deviceNamers...),
nvcdi.WithMode(opts.mode),
nvcdi.WithConfigSearchPaths(opts.configSearchPaths.Value()),
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
nvcdi.WithCSVFiles(opts.csv.files.Value()),
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),

View File

@@ -17,6 +17,7 @@
package list
import (
"errors"
"fmt"
"github.com/urfave/cli/v2"
@@ -29,7 +30,9 @@ type command struct {
logger logger.Interface
}
type config struct{}
type config struct {
cdiSpecDirs cli.StringSlice
}
// NewCommand constructs a cdi list command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
@@ -55,30 +58,44 @@ func (m command) build() *cli.Command {
},
}
c.Flags = []cli.Flag{}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "spec-dir",
Usage: "specify the directories to scan for CDI specifications",
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
Destination: &cfg.cdiSpecDirs,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *config) error {
if len(cfg.cdiSpecDirs.Value()) == 0 {
return errors.New("at least one CDI specification directory must be specified")
}
return nil
}
func (m command) run(c *cli.Context, cfg *config) error {
registry, err := cdi.NewCache(
cdi.WithAutoRefresh(false),
cdi.WithSpecDirs(cdi.DefaultSpecDirs...),
cdi.WithSpecDirs(cfg.cdiSpecDirs.Value()...),
)
if err != nil {
return fmt.Errorf("failed to create CDI cache: %v", err)
}
refreshErr := registry.Refresh()
_ = registry.Refresh()
if errors := registry.GetErrors(); len(errors) > 0 {
m.logger.Warningf("The following registry errors were reported:")
for k, err := range errors {
m.logger.Warningf("%v: %v", k, err)
}
}
devices := registry.ListDevices()
m.logger.Infof("Found %d CDI devices", len(devices))
if refreshErr != nil {
m.logger.Warningf("Refreshing the CDI registry returned the following error(s): %v", refreshErr)
}
for _, device := range devices {
fmt.Printf("%s\n", device)
}

View File

@@ -38,7 +38,8 @@ type command struct {
// options stores the subcommand options
type options struct {
flags.Options
sets cli.StringSlice
setListSeparator string
sets cli.StringSlice
}
// NewCommand constructs an config command with the specified logger
@@ -57,6 +58,9 @@ func (m command) build() *cli.Command {
c := cli.Command{
Name: "config",
Usage: "Interact with the NVIDIA Container Toolkit configuration",
Before: func(ctx *cli.Context) error {
return validateFlags(ctx, &opts)
},
Action: func(ctx *cli.Context) error {
return run(ctx, &opts)
},
@@ -71,10 +75,21 @@ func (m command) build() *cli.Command {
Destination: &opts.Config,
},
&cli.StringSliceFlag{
Name: "set",
Usage: "Set a config value using the pattern key=value. If value is empty, this is equivalent to specifying the same key in unset. This flag can be specified multiple times",
Name: "set",
Usage: "Set a config value using the pattern 'key[=value]'. " +
"Specifying only 'key' is equivalent to 'key=true' for boolean settings. " +
"This flag can be specified multiple times, but only the last value for a specific " +
"config option is applied. " +
"If the setting represents a list, the elements are colon-separated.",
Destination: &opts.sets,
},
&cli.StringFlag{
Name: "set-list-separator",
Usage: "Specify a separator for lists applied using the set command.",
Hidden: true,
Value: ":",
Destination: &opts.setListSeparator,
},
&cli.BoolFlag{
Name: "in-place",
Aliases: []string{"i"},
@@ -96,6 +111,13 @@ func (m command) build() *cli.Command {
return &c
}
func validateFlags(c *cli.Context, opts *options) error {
if opts.setListSeparator == "" {
return fmt.Errorf("set-list-separator must be set")
}
return nil
}
func run(c *cli.Context, opts *options) error {
cfgToml, err := config.New(
config.WithConfigFile(opts.Config),
@@ -105,11 +127,15 @@ func run(c *cli.Context, opts *options) error {
}
for _, set := range opts.sets.Value() {
key, value, err := setFlagToKeyValue(set)
key, value, err := setFlagToKeyValue(set, opts.setListSeparator)
if err != nil {
return fmt.Errorf("invalid --set option %v: %w", set, err)
}
cfgToml.Set(key, value)
if value == nil {
_ = cfgToml.Delete(key)
} else {
cfgToml.Set(key, value)
}
}
if err := opts.EnsureOutputFolder(); err != nil {
@@ -135,7 +161,7 @@ var errInvalidFormat = errors.New("invalid format")
// setFlagToKeyValue converts a --set flag to a key-value pair.
// The set flag is of the form key[=value], with the value being optional if key refers to a
// boolean config option.
func setFlagToKeyValue(setFlag string) (string, interface{}, error) {
func setFlagToKeyValue(setFlag string, setListSeparator string) (string, interface{}, error) {
setParts := strings.SplitN(setFlag, "=", 2)
key := setParts[0]
@@ -146,24 +172,29 @@ func setFlagToKeyValue(setFlag string) (string, interface{}, error) {
kind := field.Kind()
if len(setParts) != 2 {
if kind == reflect.Bool {
if kind == reflect.Bool || (kind == reflect.Pointer && field.Elem().Kind() == reflect.Bool) {
return key, true, nil
}
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
}
value := setParts[1]
if kind == reflect.Pointer && value != "nil" {
kind = field.Elem().Kind()
}
switch kind {
case reflect.Pointer:
return key, nil, nil
case reflect.Bool:
b, err := strconv.ParseBool(value)
if err != nil {
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
return key, b, err
return key, b, nil
case reflect.String:
return key, value, nil
case reflect.Slice:
valueParts := strings.Split(value, ",")
valueParts := strings.Split(value, setListSeparator)
switch field.Elem().Kind() {
case reflect.String:
return key, valueParts, nil
@@ -201,7 +232,7 @@ func getStruct(current reflect.Type, paths ...string) (reflect.StructField, erro
if !ok {
continue
}
if v != tomlField {
if strings.SplitN(v, ",", 2)[0] != tomlField {
continue
}
if len(paths) == 1 {

View File

@@ -25,11 +25,12 @@ import (
func TestSetFlagToKeyValue(t *testing.T) {
// TODO: We need to enable this test again since switching to reflect.
testCases := []struct {
description string
setFlag string
expectedKey string
expectedValue interface{}
expectedError error
description string
setFlag string
setListSeparator string
expectedKey string
expectedValue interface{}
expectedError error
}{
{
description: "option not present returns an error",
@@ -106,22 +107,34 @@ func TestSetFlagToKeyValue(t *testing.T) {
expectedValue: []string{"string-value"},
},
{
description: "[]string option returns multiple values",
setFlag: "nvidia-container-cli.environment=first,second",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
description: "[]string option returns multiple values",
setFlag: "nvidia-container-cli.environment=first,second",
setListSeparator: ",",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
},
{
description: "[]string option returns values with equals",
setFlag: "nvidia-container-cli.environment=first=1,second=2",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first=1", "second=2"},
description: "[]string option returns values with equals",
setFlag: "nvidia-container-cli.environment=first=1,second=2",
setListSeparator: ",",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first=1", "second=2"},
},
{
description: "[]string option returns multiple values semi-colon",
setFlag: "nvidia-container-cli.environment=first;second",
setListSeparator: ";",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
k, v, err := setFlagToKeyValue(tc.setFlag)
if tc.setListSeparator == "" {
tc.setListSeparator = ","
}
k, v, err := setFlagToKeyValue(tc.setFlag, tc.setListSeparator)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expectedKey, k)
require.EqualValues(t, tc.expectedValue, v)

View File

@@ -17,13 +17,10 @@
package hook
import (
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/commands"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
)
type hookCommand struct {
@@ -46,11 +43,7 @@ func (m hookCommand) build() *cli.Command {
Usage: "A collection of hooks that may be injected into an OCI spec",
}
hook.Subcommands = []*cli.Command{
ldcache.NewCommand(m.logger),
symlinks.NewCommand(m.logger),
chmod.NewCommand(m.logger),
}
hook.Subcommands = commands.New(m.logger)
return &hook
}

View File

@@ -17,6 +17,7 @@
package configure
import (
"encoding/json"
"fmt"
"path/filepath"
@@ -66,6 +67,8 @@ type config struct {
mode string
hookFilePath string
runtimeConfigOverrideJSON string
nvidiaRuntime struct {
name string
path string
@@ -153,6 +156,13 @@ func (m command) build() *cli.Command {
Usage: "Enable CDI in the configured runtime",
Destination: &config.cdi.enabled,
},
&cli.StringFlag{
Name: "runtime-config-override",
Destination: &config.runtimeConfigOverrideJSON,
Usage: "specify additional runtime options as a JSON string. The paths are relative to the runtime config.",
Value: "{}",
EnvVars: []string{"RUNTIME_CONFIG_OVERRIDE"},
},
}
return &configure
@@ -194,6 +204,11 @@ func (m command) validateFlags(c *cli.Context, config *config) error {
config.cdi.enabled = false
}
if config.runtimeConfigOverrideJSON != "" && config.runtime != "containerd" {
m.logger.Warningf("Ignoring runtime-config-override flag for %v", config.runtime)
config.runtimeConfigOverrideJSON = ""
}
return nil
}
@@ -237,10 +252,16 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
return fmt.Errorf("unable to load config for runtime %v: %v", config.runtime, err)
}
runtimeConfigOverride, err := config.runtimeConfigOverride()
if err != nil {
return fmt.Errorf("unable to parse config overrides: %w", err)
}
err = cfg.AddRuntime(
config.nvidiaRuntime.name,
config.nvidiaRuntime.path,
config.nvidiaRuntime.setAsDefault,
runtimeConfigOverride,
)
if err != nil {
return fmt.Errorf("unable to update config: %v", err)
@@ -293,6 +314,20 @@ func (c *config) getOuputConfigPath() string {
return c.resolveConfigFilePath()
}
// runtimeConfigOverride converts the specified runtimeConfigOverride JSON string to a map.
func (o *config) runtimeConfigOverride() (map[string]interface{}, error) {
if o.runtimeConfigOverrideJSON == "" {
return nil, nil
}
runtimeOptions := make(map[string]interface{})
if err := json.Unmarshal([]byte(o.runtimeConfigOverrideJSON), &runtimeOptions); err != nil {
return nil, fmt.Errorf("failed to read %v as JSON: %w", o.runtimeConfigOverrideJSON, err)
}
return runtimeOptions, nil
}
// configureOCIHook creates and configures the OCI hook for the NVIDIA runtime
func (m *command) configureOCIHook(c *cli.Context, config *config) error {
err := ocihook.CreateHook(config.hookFilePath, config.nvidiaRuntime.hookPath)

View File

@@ -87,7 +87,7 @@ func (m command) build() *cli.Command {
Usage: "The path to the driver root. `DRIVER_ROOT`/dev is searched for NVIDIA device nodes.",
Value: "/",
Destination: &cfg.driverRoot,
EnvVars: []string{"DRIVER_ROOT"},
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "watch",

View File

@@ -31,7 +31,8 @@ type command struct {
}
type options struct {
driverRoot string
root string
devRoot string
dryRun bool
@@ -65,11 +66,21 @@ func (m command) build() *cli.Command {
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "driver-root",
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Name: "root",
// TODO: Remove this alias
Aliases: []string{"driver-root"},
Usage: "the path to to the root to use to load the kernel modules. This root must be a chrootable path. " +
"If device nodes to be created these will be created at `ROOT`/dev unless an alternative path is specified",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"DRIVER_ROOT"},
Destination: &opts.root,
// TODO: Remove the NVIDIA_DRIVER_ROOT and DRIVER_ROOT envvars.
EnvVars: []string{"ROOT", "NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
},
&cli.StringFlag{
Name: "dev-root",
Usage: "specify the root where `/dev` is located. If this is not specified, the root is assumed.",
Destination: &opts.devRoot,
EnvVars: []string{"NVIDIA_DEV_ROOT", "DEV_ROOT"},
},
&cli.BoolFlag{
Name: "control-devices",
@@ -83,7 +94,7 @@ func (m command) build() *cli.Command {
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "if set, the command will not create any symlinks.",
Usage: "if set, the command will not perform any operations",
Value: false,
Destination: &opts.dryRun,
EnvVars: []string{"DRY_RUN"},
@@ -94,6 +105,10 @@ func (m command) build() *cli.Command {
}
func (m command) validateFlags(r *cli.Context, opts *options) error {
if opts.devRoot == "" && opts.root != "" {
m.logger.Infof("Using dev-root %q", opts.root)
opts.devRoot = opts.root
}
return nil
}
@@ -102,7 +117,7 @@ func (m command) run(c *cli.Context, opts *options) error {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(opts.dryRun),
nvmodules.WithRoot(opts.driverRoot),
nvmodules.WithRoot(opts.root),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
@@ -113,12 +128,12 @@ func (m command) run(c *cli.Context, opts *options) error {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(opts.dryRun),
nvdevices.WithDevRoot(opts.driverRoot),
nvdevices.WithDevRoot(opts.devRoot),
)
if err != nil {
return err
}
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
m.logger.Infof("Creating control device nodes at %s", opts.devRoot)
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
}

View File

@@ -62,7 +62,7 @@ func (m command) build() *cli.Command {
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"DRIVER_ROOT"},
EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
},
}

View File

@@ -12,11 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
FROM nvidia/cuda:12.5.0-base-ubuntu20.04
ARG ARTIFACTS_ROOT
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
@@ -24,7 +22,6 @@ COPY ${ARTIFACTS_ROOT} /artifacts/packages/
WORKDIR /artifacts/packages
# build-args are added to the manifest.txt file below.
ARG BASE_DIST
ARG PACKAGE_DIST
ARG PACKAGE_VERSION
ARG GIT_BRANCH

View File

@@ -12,12 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
FROM nvidia/cuda:12.5.0-base-ubi8 as build
RUN yum install -y \
wget make git gcc \
@@ -31,7 +29,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -50,17 +48,7 @@ COPY . .
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG BASE_DIST
# See https://www.centos.org/centos-linux-eol/
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
( \
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
)
FROM nvidia/cuda:12.5.0-base-ubi8
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=void

View File

@@ -12,12 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
FROM nvidia/cuda:12.5.0-base-ubuntu20.04 as build
RUN apt-get update && \
apt-get install -y wget make git gcc \
@@ -31,7 +29,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -49,7 +47,7 @@ COPY . .
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
FROM nvcr.io/nvidia/cuda:12.5.0-base-ubuntu20.04
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
RUN rm -f /etc/apt/sources.list.d/cuda.list
@@ -75,14 +73,6 @@ ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH ${TARGETARCH}
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container/stable"
ARG LIBNVIDIA_CONTAINER0_VERSION
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
fi
RUN dpkg -i \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \

View File

@@ -45,7 +45,7 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DEFAULT_PUSH_TARGET := ubuntu20.04
DISTRIBUTIONS := ubuntu20.04 ubi8 centos7
DISTRIBUTIONS := ubuntu20.04 ubi8
META_TARGETS := packaging
@@ -56,9 +56,9 @@ TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
include $(CURDIR)/build/container/native-only.mk
include $(CURDIR)/deployments/container/native-only.mk
else
include $(CURDIR)/build/container/multi-arch.mk
include $(CURDIR)/deployments/container/multi-arch.mk
endif
# For the default push target we also push a short tag equal to the version.
@@ -84,7 +84,7 @@ push-short:
build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SUFFIX)
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
@@ -92,14 +92,12 @@ ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
DOCKER_BUILDKIT=1 \
$(DOCKER) $(BUILDX) build --pull \
--provenance=false --sbom=false \
$(DOCKER_BUILD_OPTIONS) \
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
--tag $(IMAGE) \
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
--build-arg BASE_DIST="$(BASE_DIST)" \
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg VERSION="$(VERSION)" \
@@ -111,20 +109,12 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
$(CURDIR)
build-ubuntu%: BASE_DIST = $(*)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
build-ubi8: BASE_DIST := ubi8
build-ubi8: DOCKERFILE_SUFFIX := centos
build-ubi8: DOCKERFILE_SUFFIX := ubi8
build-ubi8: PACKAGE_DIST = centos7
build-centos7: BASE_DIST = $(*)
build-centos7: DOCKERFILE_SUFFIX := centos
build-centos7: PACKAGE_DIST = $(BASE_DIST)
build-packaging: BASE_DIST := ubuntu20.04
build-packaging: DOCKERFILE_SUFFIX := packaging
build-packaging: PACKAGE_ARCH := amd64
build-packaging: PACKAGE_DIST = all
@@ -145,9 +135,7 @@ test-packaging:
@echo "Testing package image contents"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/aarch64" || echo "Missing centos7/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"

View File

@@ -16,9 +16,6 @@ PUSH_ON_BUILD ?= false
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
# We only have x86_64 packages for centos7
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
# We only generate amd64 image for ubuntu18.04
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

View File

@@ -22,7 +22,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \

View File

@@ -15,7 +15,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture"; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \

View File

@@ -17,6 +17,15 @@
ARG BASEIMAGE
FROM ${BASEIMAGE}
# centos:stream8 is EOL.
# We switch to the vault repositories for this base image.
ARG BASEIMAGE
RUN if [ "${BASEIMAGE}" = "quay.io/centos/centos:stream8" ]; then \
sed -i -e "s|mirrorlist=|#mirrorlist=|g" \
-e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" \
/etc/yum.repos.d/CentOS-Stream-*; \
fi
RUN yum install -y \
ca-certificates \
gcc \
@@ -33,7 +42,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture"; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \

View File

@@ -20,7 +20,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \

29
go.mod
View File

@@ -3,24 +3,24 @@ module github.com/NVIDIA/nvidia-container-toolkit
go 1.20
require (
github.com/NVIDIA/go-nvlib v0.0.0-20231212194527-f3264c8a6a7a
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f
github.com/fsnotify/fsnotify v1.5.4
github.com/opencontainers/runtime-spec v1.1.0
github.com/pelletier/go-toml v1.9.4
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.8.4
github.com/urfave/cli/v2 v2.3.0
golang.org/x/mod v0.5.0
golang.org/x/sys v0.7.0
tags.cncf.io/container-device-interface v0.6.2
tags.cncf.io/container-device-interface/specs-go v0.6.0
github.com/NVIDIA/go-nvlib v0.5.0
github.com/NVIDIA/go-nvml v0.12.4-0
github.com/fsnotify/fsnotify v1.7.0
github.com/opencontainers/runtime-spec v1.2.0
github.com/pelletier/go-toml v1.9.5
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.2
golang.org/x/mod v0.18.0
golang.org/x/sys v0.21.0
tags.cncf.io/container-device-interface v0.7.2
tags.cncf.io/container-device-interface/specs-go v0.7.0
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/uuid v1.4.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
@@ -29,6 +29,7 @@ require (
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect

68
go.sum
View File

@@ -1,22 +1,20 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NVIDIA/go-nvlib v0.0.0-20231212194527-f3264c8a6a7a h1:aHaNKihxpWzWnV3yoVkit3bhOF7cg2ScCbzW+gepQ/E=
github.com/NVIDIA/go-nvlib v0.0.0-20231212194527-f3264c8a6a7a/go.mod h1:U82N6/xKp6OnoqpALBH0C5SO59Buu4sX1Z3rQtBsBKQ=
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f h1:FTblgO87K1vPB8tcwM5EOFpFf6UpsrlDpErPm25mFWE=
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
github.com/NVIDIA/go-nvlib v0.5.0 h1:951KGrfr+p3cs89alO9z/ZxPPWKxwht9tx9rxiADoLI=
github.com/NVIDIA/go-nvlib v0.5.0/go.mod h1:87z49ULPr4GWPSGfSIp3taU4XENRYN/enIg88MzcL4k=
github.com/NVIDIA/go-nvml v0.12.4-0 h1:4tkbB3pT1O77JGr0gQ6uD8FrsUPqP1A/EOEm2wI1TUg=
github.com/NVIDIA/go-nvml v0.12.4-0/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
@@ -32,42 +30,36 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg=
github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI=
github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -75,18 +67,18 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -94,7 +86,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
tags.cncf.io/container-device-interface v0.6.2 h1:dThE6dtp/93ZDGhqaED2Pu374SOeUkBfuvkLuiTdwzg=
tags.cncf.io/container-device-interface v0.6.2/go.mod h1:Shusyhjs1A5Na/kqPVLL0KqnHQHuunol9LFeUNkuGVE=
tags.cncf.io/container-device-interface/specs-go v0.6.0 h1:V+tJJN6dqu8Vym6p+Ru+K5mJ49WL6Aoc5SJFSY0RLsQ=
tags.cncf.io/container-device-interface/specs-go v0.6.0/go.mod h1:hMAwAbMZyBLdmYqWgYcKH0F/yctNpV3P35f+/088A80=
tags.cncf.io/container-device-interface v0.7.2 h1:MLqGnWfOr1wB7m08ieI4YJ3IoLKKozEnnNYBtacDPQU=
tags.cncf.io/container-device-interface v0.7.2/go.mod h1:Xb1PvXv2BhfNb3tla4r9JL129ck1Lxv9KuU6eVOfKto=
tags.cncf.io/container-device-interface/specs-go v0.7.0 h1:w/maMGVeLP6TIQJVYT5pbqTi8SCw/iHZ+n4ignuGHqg=
tags.cncf.io/container-device-interface/specs-go v0.7.0/go.mod h1:hMAwAbMZyBLdmYqWgYcKH0F/yctNpV3P35f+/088A80=

80
hack/generate-changelog.sh Executable file
View File

@@ -0,0 +1,80 @@
#!/usr/bin/env bash
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o pipefail
this=`basename $0`
usage () {
cat << EOF
Generate a changelog for the specified tag
Usage: $this --reference <tag> [--remote <remote_name>]
Options:
--since specify the tag to start the changelog from (default: latest tag)
--remote specify the remote to fetch tags from (default: upstream)
--version specify the version to be released
--help/-h show this help and exit
EOF
}
REMOTE="upstream"
VERSION=""
REFERENCE=
# Parse command line options
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
--since)
REFERENCE="$2"
shift # past argument
shift # past value
;;
--remote)
REMOTE="$2"
shift # past argument
shift # past value
;;
--version)
VERSION="$2"
shift # past argument
shift # past value
;;
--help/-h) usage
exit 0
;;
*) usage
exit 1
;;
esac
done
# Fetch the latest tags from the remote
git fetch $REMOTE --tags
# if REFERENCE is not set, get the latest tag
if [ -z "$REFERENCE" ]; then
REFERENCE=$(git describe --tags $(git rev-list --tags --max-count=1))
fi
# Print the changelog
echo "## Changelog"
echo ""
echo "### Version $VERSION"
# Iterate over the commit messages and ignore the ones that start with "Merge" or "Bump"
git log --pretty=format:"%s" $REFERENCE..@ | grep -Ev "(^Merge )|(^Bump)" | sed 's/^\(.*\)/- \1/g'

View File

@@ -33,8 +33,9 @@ const (
configOverride = "XDG_CONFIG_HOME"
configFilePath = "nvidia-container-runtime/config.toml"
nvidiaCTKExecutable = "nvidia-ctk"
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
nvidiaCTKExecutable = "nvidia-ctk"
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
nvidiaCDIHookDefaultFilePath = "/usr/bin/nvidia-cdi-hook"
nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook"
@@ -63,6 +64,9 @@ type Config struct {
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
NVIDIAContainerRuntimeHookConfig RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
// Features allows for finer control over optional features.
Features features `toml:"features,omitempty"`
}
// GetConfigFilePath returns the path to the config file for the configured system
@@ -174,6 +178,8 @@ var getDistIDLike = func() []string {
// This executable is used in hooks and needs to be an absolute path.
// If the path is specified as an absolute path, it is used directly
// without checking for existence of an executable at that path.
//
// Deprecated: Use ResolveNVIDIACDIHookPath directly instead.
func ResolveNVIDIACTKPath(logger logger.Interface, nvidiaCTKPath string) string {
return resolveWithDefault(
logger,
@@ -183,6 +189,27 @@ func ResolveNVIDIACTKPath(logger logger.Interface, nvidiaCTKPath string) string
)
}
// ResolveNVIDIACDIHookPath resolves the path to the nvidia-cdi-hook binary.
// This executable is used in hooks and needs to be an absolute path.
// If the path is specified as an absolute path, it is used directly
// without checking for existence of an executable at that path.
func ResolveNVIDIACDIHookPath(logger logger.Interface, nvidiaCDIHookPath string) string {
if filepath.Base(nvidiaCDIHookPath) == "nvidia-ctk" {
return resolveWithDefault(
logger,
"NVIDIA Container Toolkit CLI",
nvidiaCDIHookPath,
nvidiaCTKDefaultFilePath,
)
}
return resolveWithDefault(
logger,
"NVIDIA CDI Hook CLI",
nvidiaCDIHookPath,
nvidiaCDIHookDefaultFilePath,
)
}
// ResolveNVIDIAContainerRuntimeHookPath resolves the path the nvidia-container-runtime-hook binary.
func ResolveNVIDIAContainerRuntimeHookPath(logger logger.Interface, nvidiaContainerRuntimeHookPath string) string {
return resolveWithDefault(

View File

@@ -0,0 +1,85 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
type featureName string
const (
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy")
)
// features specifies a set of named features.
type features struct {
GDS *feature `toml:"gds,omitempty"`
MOFED *feature `toml:"mofed,omitempty"`
NVSWITCH *feature `toml:"nvswitch,omitempty"`
GDRCopy *feature `toml:"gdrcopy,omitempty"`
}
type feature bool
// IsEnabled checks whether a specified named feature is enabled.
// An optional list of environments to check for feature-specific environment
// variables can also be supplied.
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
featureEnvvars := map[featureName]string{
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
}
envvar := featureEnvvars[n]
switch n {
case FeatureGDS:
return fs.GDS.isEnabled(envvar, in...)
case FeatureMOFED:
return fs.MOFED.isEnabled(envvar, in...)
case FeatureNVSWITCH:
return fs.NVSWITCH.isEnabled(envvar, in...)
case FeatureGDRCopy:
return fs.GDRCopy.isEnabled(envvar, in...)
default:
return false
}
}
// isEnabled checks whether a feature is enabled.
// If the enabled value is explicitly set, this is returned, otherwise the
// associated envvar is checked in the specified getenver for the string "enabled"
// A CUDA container / image can be passed here.
func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
if f != nil {
return bool(*f)
}
if envvar == "" {
return false
}
for _, in := range ins {
if in.Getenv(envvar) == "enabled" {
return true
}
}
return false
}
type getenver interface {
Getenv(string) string
}

View File

@@ -23,7 +23,8 @@ import (
)
/*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
#ifdef _WIN32
#define CUDAAPI __stdcall

View File

@@ -36,20 +36,20 @@ import (
// TODO: The logic for creating DRM devices should be consolidated between this
// and the logic for generating CDI specs for a single device. This is only used
// when applying OCI spec modifications to an incoming spec in "legacy" mode.
func NewDRMNodesDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string, nvidiaCTKPath string) (Discover, error) {
func NewDRMNodesDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string, nvidiaCDIHookPath string) (Discover, error) {
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
}
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath)
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCDIHookPath)
discover := Merge(drmDeviceNodes, drmByPathSymlinks)
return discover, nil
}
// NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan.
func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string) (Discover, error) {
libraries := NewMounts(
logger,
driver.Libraries(),
@@ -61,49 +61,61 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, n
jsonMounts := NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driver.Root),
lookup.WithSearchPaths("/etc", "/usr/share"),
),
driver.Configs(),
driver.Root,
[]string{
"glvnd/egl_vendor.d/10_nvidia.json",
"vulkan/icd.d/nvidia_icd.json",
"vulkan/icd.d/nvidia_layers.json",
"vulkan/implicit_layer.d/nvidia_layers.json",
"egl/egl_external_platform.d/15_nvidia_gbm.json",
"egl/egl_external_platform.d/10_nvidia_wayland.json",
"nvidia/nvoptix.bin",
},
)
xorg := optionalXorgDiscoverer(logger, driver, nvidiaCTKPath)
xorg := optionalXorgDiscoverer(logger, driver, nvidiaCDIHookPath)
discover := Merge(
libraries,
jsonMounts,
newVulkanMountsDiscoverer(logger, driver),
xorg,
)
return discover, nil
}
// newVulkanMountsDiscoverer creates a discoverer for vulkan ICD files.
// For these files we search the standard driver config paths as well as the
// driver root itself. This allows us to support GKE installations where the
// vulkan ICD files are at {{ .driverRoot }}/vulkan instead of in /etc/vulkan.
func newVulkanMountsDiscoverer(logger logger.Interface, driver *root.Driver) Discover {
locator := lookup.First(driver.Configs(), driver.Files())
return &mountsToContainerPath{
logger: logger,
locator: locator,
required: []string{
"vulkan/icd.d/nvidia_icd.json",
"vulkan/icd.d/nvidia_layers.json",
"vulkan/implicit_layer.d/nvidia_layers.json",
},
containerRoot: "/etc",
}
}
type drmDevicesByPath struct {
None
logger logger.Interface
nvidiaCTKPath string
devRoot string
devicesFrom Discover
logger logger.Interface
nvidiaCDIHookPath string
devRoot string
devicesFrom Discover
}
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover {
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCDIHookPath string) Discover {
d := drmDevicesByPath{
logger: logger,
nvidiaCTKPath: nvidiaCTKPath,
devRoot: devRoot,
devicesFrom: devices,
logger: logger,
nvidiaCDIHookPath: nvidiaCDIHookPath,
devRoot: devRoot,
devicesFrom: devices,
}
return &d
@@ -131,8 +143,8 @@ func (d drmDevicesByPath) Hooks() ([]Hook, error) {
args = append(args, "--link", l)
}
hook := CreateNvidiaCTKHook(
d.nvidiaCTKPath,
hook := CreateNvidiaCDIHook(
d.nvidiaCDIHookPath,
"create-symlinks",
args...,
)
@@ -237,17 +249,17 @@ func newDRMDeviceFilter(devices image.VisibleDevices, devRoot string) (Filter, e
}
type xorgHooks struct {
libraries Discover
driverVersion string
nvidiaCTKPath string
libraries Discover
driverVersion string
nvidiaCDIHookPath string
}
var _ Discover = (*xorgHooks)(nil)
// optionalXorgDiscoverer creates a discoverer for Xorg libraries.
// If the creation of the discoverer fails, a None discoverer is returned.
func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) Discover {
xorg, err := newXorgDiscoverer(logger, driver, nvidiaCTKPath)
func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string) Discover {
xorg, err := newXorgDiscoverer(logger, driver, nvidiaCDIHookPath)
if err != nil {
logger.Warningf("Failed to create Xorg discoverer: %v; skipping xorg libraries", err)
return None{}
@@ -255,7 +267,7 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia
return xorg
}
func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string) (Discover, error) {
libCudaPaths, err := cuda.New(
driver.Libraries(),
).Locate(".*.*")
@@ -285,18 +297,14 @@ func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPa
},
)
xorgHooks := xorgHooks{
libraries: xorgLibs,
driverVersion: version,
nvidiaCTKPath: nvidiaCTKPath,
libraries: xorgLibs,
driverVersion: version,
nvidiaCDIHookPath: nvidiaCDIHookPath,
}
xorgConfig := NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driver.Root),
lookup.WithSearchPaths("/usr/share"),
),
driver.Configs(),
driver.Root,
[]string{"X11/xorg.conf.d/10-nvidia.conf"},
)
@@ -340,7 +348,7 @@ func (m xorgHooks) Hooks() ([]Hook, error) {
link := strings.TrimSuffix(target, "."+m.driverVersion)
links := []string{fmt.Sprintf("%s::%s", filepath.Base(target), link)}
symlinkHook := CreateCreateSymlinkHook(
m.nvidiaCTKPath,
m.nvidiaCDIHookPath,
links,
)

View File

@@ -41,7 +41,7 @@ func (h Hook) Hooks() ([]Hook, error) {
}
// CreateCreateSymlinkHook creates a hook which creates a symlink from link -> target.
func CreateCreateSymlinkHook(nvidiaCTKPath string, links []string) Discover {
func CreateCreateSymlinkHook(nvidiaCDIHookPath string, links []string) Discover {
if len(links) == 0 {
return None{}
}
@@ -50,18 +50,31 @@ func CreateCreateSymlinkHook(nvidiaCTKPath string, links []string) Discover {
for _, link := range links {
args = append(args, "--link", link)
}
return CreateNvidiaCTKHook(
nvidiaCTKPath,
return CreateNvidiaCDIHook(
nvidiaCDIHookPath,
"create-symlinks",
args...,
)
}
// CreateNvidiaCTKHook creates a hook which invokes the NVIDIA Container CLI hook subcommand.
func CreateNvidiaCTKHook(nvidiaCTKPath string, hookName string, additionalArgs ...string) Hook {
// CreateNvidiaCDIHook creates a hook which invokes the NVIDIA Container CLI hook subcommand.
func CreateNvidiaCDIHook(nvidiaCDIHookPath string, hookName string, additionalArgs ...string) Hook {
return cdiHook(nvidiaCDIHookPath).Create(hookName, additionalArgs...)
}
type cdiHook string
func (c cdiHook) Create(name string, args ...string) Hook {
return Hook{
Lifecycle: cdi.CreateContainerHook,
Path: nvidiaCTKPath,
Args: append([]string{filepath.Base(nvidiaCTKPath), "hook", hookName}, additionalArgs...),
Path: string(c),
Args: append(c.requiredArgs(name), args...),
}
}
func (c cdiHook) requiredArgs(name string) []string {
base := filepath.Base(string(c))
if base == "nvidia-ctk" {
return []string{base, "hook", name}
}
return []string{base, name}
}

View File

@@ -25,12 +25,12 @@ import (
)
// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath, ldconfigPath string) (Discover, error) {
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCDIHookPath, ldconfigPath string) (Discover, error) {
d := ldconfig{
logger: logger,
nvidiaCTKPath: nvidiaCTKPath,
ldconfigPath: ldconfigPath,
mountsFrom: mounts,
logger: logger,
nvidiaCDIHookPath: nvidiaCDIHookPath,
ldconfigPath: ldconfigPath,
mountsFrom: mounts,
}
return &d, nil
@@ -38,10 +38,10 @@ func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPat
type ldconfig struct {
None
logger logger.Interface
nvidiaCTKPath string
ldconfigPath string
mountsFrom Discover
logger logger.Interface
nvidiaCDIHookPath string
ldconfigPath string
mountsFrom Discover
}
// Hooks checks the required mounts for libraries and returns a hook to update the LDcache for the discovered paths.
@@ -51,7 +51,7 @@ func (d ldconfig) Hooks() ([]Hook, error) {
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
}
h := CreateLDCacheUpdateHook(
d.nvidiaCTKPath,
d.nvidiaCDIHookPath,
d.ldconfigPath,
getLibraryPaths(mounts),
)
@@ -70,7 +70,7 @@ func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []str
args = append(args, "--folder", f)
}
hook := CreateNvidiaCTKHook(
hook := CreateNvidiaCDIHook(
executable,
"update-ldcache",
args...,

View File

@@ -25,8 +25,8 @@ import (
)
const (
testNvidiaCTKPath = "/foo/bar/nvidia-ctk"
testLdconfigPath = "/bar/baz/ldconfig"
testNvidiaCDIHookPath = "/foo/bar/nvidia-cdi-hook"
testLdconfigPath = "/bar/baz/ldconfig"
)
func TestLDCacheUpdateHook(t *testing.T) {
@@ -42,7 +42,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
}{
{
description: "empty mounts",
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache"},
expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache"},
},
{
description: "mount error",
@@ -65,7 +65,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
Path: "/usr/local/lib/libbar.so",
},
},
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
},
{
description: "host paths are ignored",
@@ -75,12 +75,12 @@ func TestLDCacheUpdateHook(t *testing.T) {
Path: "/usr/local/lib/libfoo.so",
},
},
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--folder", "/usr/local/lib"},
expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"},
},
{
description: "explicit ldconfig path is passed",
ldconfigPath: testLdconfigPath,
expectedArgs: []string{"nvidia-ctk", "hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
},
}
@@ -92,12 +92,12 @@ func TestLDCacheUpdateHook(t *testing.T) {
},
}
expectedHook := Hook{
Path: testNvidiaCTKPath,
Path: testNvidiaCDIHookPath,
Args: tc.expectedArgs,
Lifecycle: "createContainer",
}
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath, tc.ldconfigPath)
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCDIHookPath, tc.ldconfigPath)
require.NoError(t, err)
hooks, err := d.Hooks()

View File

@@ -0,0 +1,81 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
// mountsToContainerPath defines a Discoverer for a required set of mounts.
// When these are discovered by a locator the specified container root is used
// to construct the container path for the mount returned.
type mountsToContainerPath struct {
None
logger logger.Interface
locator lookup.Locator
required []string
containerRoot string
}
func (d *mountsToContainerPath) Mounts() ([]Mount, error) {
seen := make(map[string]bool)
var mounts []Mount
for _, target := range d.required {
if strings.Contains(target, "*") {
// TODO: We could relax this condition.
return nil, fmt.Errorf("wildcard patterns are not supported: %s", target)
}
candidates, err := d.locator.Locate(target)
if err != nil {
d.logger.Warningf("Could not locate %v: %v", target, err)
continue
}
if len(candidates) == 0 {
d.logger.Warningf("Missing %v", target)
continue
}
hostPath := candidates[0]
if seen[hostPath] {
d.logger.Debugf("Skipping duplicate mount %v", hostPath)
continue
}
seen[hostPath] = true
d.logger.Debugf("Located %v as %v", target, hostPath)
containerPath := filepath.Join(d.containerRoot, target)
d.logger.Infof("Selecting %v as %v", hostPath, containerPath)
mount := Mount{
HostPath: hostPath,
Path: containerPath,
Options: []string{
"ro",
"nosuid",
"nodev",
"bind",
},
}
mounts = append(mounts, mount)
}
return mounts, nil
}

View File

@@ -0,0 +1,148 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"errors"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
func TestMountsToContainerPath(t *testing.T) {
logger, _ := testlog.NewNullLogger()
mountOptions := []string{
"ro",
"nosuid",
"nodev",
"bind",
}
testCases := []struct {
description string
required []string
locator lookup.Locator
containerRoot string
expectedMounts []Mount
expectedError error
}{
{
description: "containerRoot is prepended",
required: []string{"a/path/exists.txt", "another/path/exists.txt"},
locator: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
return []string{"/located/root/" + s}, nil
},
},
containerRoot: "/container",
expectedMounts: []Mount{
{
HostPath: "/located/root/a/path/exists.txt",
Path: "/container/a/path/exists.txt",
Options: mountOptions,
},
{
HostPath: "/located/root/another/path/exists.txt",
Path: "/container/another/path/exists.txt",
Options: mountOptions,
},
},
},
{
description: "duplicate mounts are skipped",
required: []string{"a/path/exists.txt", "another/path/exists.txt"},
locator: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
return []string{"/located/root/single.txt"}, nil
},
},
containerRoot: "/container",
expectedMounts: []Mount{
{
HostPath: "/located/root/single.txt",
Path: "/container/a/path/exists.txt",
Options: mountOptions,
},
},
},
{
description: "locator errors are ignored",
required: []string{"a/path/exists.txt"},
locator: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
return nil, errors.New("not found")
},
},
containerRoot: "/container",
expectedMounts: []Mount{},
},
{
description: "not located are ignored",
required: []string{"a/path/exists.txt"},
locator: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
return nil, nil
},
},
containerRoot: "/container",
expectedMounts: []Mount{},
},
{
description: "second candidate is ignored",
required: []string{"a/path/exists.txt"},
locator: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
return []string{"/located/root/" + s, "/located2/root/" + s}, nil
},
},
containerRoot: "/container",
expectedMounts: []Mount{
{
HostPath: "/located/root/a/path/exists.txt",
Path: "/container/a/path/exists.txt",
Options: mountOptions,
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
d := mountsToContainerPath{
logger: logger,
locator: tc.locator,
required: tc.required,
containerRoot: tc.containerRoot,
}
devices, err := d.Devices()
require.NoError(t, err)
require.Empty(t, devices)
hooks, err := d.Hooks()
require.NoError(t, err)
require.Empty(t, hooks)
mounts, err := d.Mounts()
require.ErrorIs(t, err, tc.expectedError)
require.ElementsMatch(t, tc.expectedMounts, mounts)
})
}
}

View File

@@ -41,14 +41,17 @@ static const char * const dxcore_nvidia_driver_store_components[] = {
*/
struct dxcore_enumAdapters2;
struct dxcore_enumAdapters3;
struct dxcore_queryAdapterInfo;
typedef int(*pfnDxcoreEnumAdapters2)(struct dxcore_enumAdapters2* pParams);
typedef int(*pfnDxcoreEnumAdapters3)(struct dxcore_enumAdapters3* pParams);
typedef int(*pfnDxcoreQueryAdapterInfo)(struct dxcore_queryAdapterInfo* pParams);
struct dxcore_lib {
void* hDxcoreLib;
pfnDxcoreEnumAdapters2 pDxcoreEnumAdapters2;
pfnDxcoreEnumAdapters3 pDxcoreEnumAdapters3;
pfnDxcoreQueryAdapterInfo pDxcoreQueryAdapterInfo;
};
@@ -66,6 +69,15 @@ struct dxcore_enumAdapters2
struct dxcore_adapterInfo *pAdapters;
};
#define ENUMADAPTER3_FILTER_COMPUTE_ONLY (0x0000000000000001)
struct dxcore_enumAdapters3
{
unsigned long long Filter;
unsigned int NumAdapters;
struct dxcore_adapterInfo *pAdapters;
};
enum dxcore_kmtqueryAdapterInfoType
{
DXCORE_QUERYDRIVERVERSION = 13,
@@ -239,7 +251,37 @@ static void dxcore_add_adapter(struct dxcore_context* pCtx, struct dxcore_lib* p
log_infof("Adding new adapter via dxcore hAdapter:%x luid:%llx wddm version:%d", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid), wddmVersion);
}
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
static int dxcore_enum_adapters3(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
{
struct dxcore_enumAdapters3 params = {0};
unsigned int adapterIndex = 0;
// Include compute-only in addition to display+compute adapters
params.Filter = ENUMADAPTER3_FILTER_COMPUTE_ONLY;
params.NumAdapters = 0;
params.pAdapters = NULL;
if (pLib->pDxcoreEnumAdapters3(&params)) {
log_err("Failed to enumerate adapters via enumAdapers3");
return 1;
}
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
if (pLib->pDxcoreEnumAdapters3(&params)) {
free(params.pAdapters);
log_err("Failed to enumerate adapters via enumAdapers3");
return 1;
}
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
dxcore_add_adapter(pCtx, pLib, &params.pAdapters[adapterIndex]);
}
free(params.pAdapters);
return 0;
}
static int dxcore_enum_adapters2(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
{
struct dxcore_enumAdapters2 params = {0};
unsigned int adapterIndex = 0;
@@ -248,15 +290,15 @@ static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib*
params.pAdapters = NULL;
if (pLib->pDxcoreEnumAdapters2(&params)) {
log_err("Failed to enumerate adapters via dxcore");
return;
log_err("Failed to enumerate adapters via enumAdapters2");
return 1;
}
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
if (pLib->pDxcoreEnumAdapters2(&params)) {
free(params.pAdapters);
log_err("Failed to enumerate adapters via dxcore");
return;
log_err("Failed to enumerate adapters via enumAdapters2");
return 1;
}
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
@@ -264,6 +306,27 @@ static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib*
}
free(params.pAdapters);
return 0;
}
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
{
int status;
if (pLib->pDxcoreEnumAdapters3) {
status = dxcore_enum_adapters3(pCtx, pLib);
if (status == 0) {
return;
}
}
// Fall back to EnumAdapters2 if the OS doesn't support EnumAdapters3
if (pLib->pDxcoreEnumAdapters2) {
status = dxcore_enum_adapters2(pCtx, pLib);
if (status == 0) {
return;
}
}
log_err("Failed to enumerate adapters via dxcore");
}
int dxcore_init_context(struct dxcore_context* pCtx)
@@ -280,8 +343,9 @@ int dxcore_init_context(struct dxcore_context* pCtx)
}
lib.pDxcoreEnumAdapters2 = (pfnDxcoreEnumAdapters2)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters2");
if (!lib.pDxcoreEnumAdapters2) {
log_err("dxcore library is present but the symbol D3DKMTEnumAdapters2 is missing");
lib.pDxcoreEnumAdapters3 = (pfnDxcoreEnumAdapters3)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters3");
if (!lib.pDxcoreEnumAdapters2 && !lib.pDxcoreEnumAdapters3) {
log_err("dxcore library is present but the symbols D3DKMTEnumAdapters2 and D3DKMTEnumAdapters3 are missing");
goto error;
}

View File

@@ -17,7 +17,9 @@
package dxcore
/*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
#include <dxcore.h>
*/
import "C"

View File

@@ -22,7 +22,7 @@ import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
)
// additionalInfo allows for the info.Interface to be extened to implement the infoInterface.

View File

@@ -20,7 +20,8 @@ import (
"testing"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml/mock"
"github.com/stretchr/testify/require"
)
@@ -32,7 +33,7 @@ func TestUsesNVGPUModule(t *testing.T) {
}{
{
description: "init failure returns false",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.ERROR_LIBRARY_NOT_FOUND
},
@@ -41,7 +42,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "no devices returns false",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -56,7 +57,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "DeviceGetCount error returns false",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -71,7 +72,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "Failure to get device name returns false",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -82,7 +83,7 @@ func TestUsesNVGPUModule(t *testing.T) {
return 1, nvml.SUCCESS
},
DeviceGetHandleByIndexFunc: func(index int) (nvml.Device, nvml.Return) {
device := &nvml.DeviceMock{
device := &mock.Device{
GetNameFunc: func() (string, nvml.Return) {
return "", nvml.ERROR_UNKNOWN
},
@@ -94,7 +95,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "nested panic returns false",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -105,7 +106,7 @@ func TestUsesNVGPUModule(t *testing.T) {
return 1, nvml.SUCCESS
},
DeviceGetHandleByIndexFunc: func(index int) (nvml.Device, nvml.Return) {
device := &nvml.DeviceMock{
device := &mock.Device{
GetNameFunc: func() (string, nvml.Return) {
panic("deep panic")
},
@@ -117,7 +118,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "Single device name with no nvgpu",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -128,7 +129,7 @@ func TestUsesNVGPUModule(t *testing.T) {
return 1, nvml.SUCCESS
},
DeviceGetHandleByIndexFunc: func(index int) (nvml.Device, nvml.Return) {
device := &nvml.DeviceMock{
device := &mock.Device{
GetNameFunc: func() (string, nvml.Return) {
return "NVIDIA A100-SXM4-40GB", nvml.SUCCESS
},
@@ -140,7 +141,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "Single device name with nvgpu",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -151,7 +152,7 @@ func TestUsesNVGPUModule(t *testing.T) {
return 1, nvml.SUCCESS
},
DeviceGetHandleByIndexFunc: func(index int) (nvml.Device, nvml.Return) {
device := &nvml.DeviceMock{
device := &mock.Device{
GetNameFunc: func() (string, nvml.Return) {
return "Orin (nvgpu)", nvml.SUCCESS
},
@@ -163,7 +164,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "Multiple device names with no nvgpu",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -174,7 +175,7 @@ func TestUsesNVGPUModule(t *testing.T) {
return 2, nvml.SUCCESS
},
DeviceGetHandleByIndexFunc: func(index int) (nvml.Device, nvml.Return) {
device := &nvml.DeviceMock{
device := &mock.Device{
GetNameFunc: func() (string, nvml.Return) {
return "NVIDIA A100-SXM4-40GB", nvml.SUCCESS
},
@@ -186,7 +187,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "Multiple device names with nvgpu",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -197,7 +198,7 @@ func TestUsesNVGPUModule(t *testing.T) {
return 2, nvml.SUCCESS
},
DeviceGetHandleByIndexFunc: func(index int) (nvml.Device, nvml.Return) {
device := &nvml.DeviceMock{
device := &mock.Device{
GetNameFunc: func() (string, nvml.Return) {
return "Orin (nvgpu)", nvml.SUCCESS
},
@@ -209,7 +210,7 @@ func TestUsesNVGPUModule(t *testing.T) {
},
{
description: "Mixed device names",
nvmllib: &nvml.InterfaceMock{
nvmllib: &mock.Interface{
InitFunc: func() nvml.Return {
return nvml.SUCCESS
},
@@ -226,7 +227,7 @@ func TestUsesNVGPUModule(t *testing.T) {
} else {
deviceName = "Orin (nvgpu)"
}
device := &nvml.DeviceMock{
device := &mock.Device{
GetNameFunc: func() (string, nvml.Return) {
return deviceName, nvml.SUCCESS
},
@@ -242,7 +243,7 @@ func TestUsesNVGPUModule(t *testing.T) {
t.Run(tc.description, func(t *testing.T) {
sut := additionalInfo{
nvmllib: tc.nvmllib,
devicelib: device.New(device.WithNvml(tc.nvmllib)),
devicelib: device.New(tc.nvmllib),
}
flag, _ := sut.UsesNVGPUModule()

View File

@@ -17,75 +17,40 @@
package info
import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
// infoInterface provides an alias for mocking.
//
//go:generate moq -stub -out info-interface_mock.go . infoInterface
type infoInterface interface {
info.Interface
// UsesNVGPUModule indicates whether the system is using the nvgpu kernel module
UsesNVGPUModule() (bool, string)
}
type resolver struct {
logger logger.Interface
info infoInterface
}
// ResolveAutoMode determines the correct mode for the platform if set to "auto"
func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rmode string) {
nvinfo := info.New()
nvmllib := nvml.New()
devicelib := device.New(
device.WithNvml(nvmllib),
)
info := additionalInfo{
Interface: nvinfo,
nvmllib: nvmllib,
devicelib: devicelib,
}
r := resolver{
logger: logger,
info: info,
}
return r.resolveMode(mode, image)
return resolveMode(logger, mode, image, nil)
}
// resolveMode determines the correct mode for the platform if set to "auto"
func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
func resolveMode(logger logger.Interface, mode string, image image.CUDA, propertyExtractor info.PropertyExtractor) (rmode string) {
if mode != "auto" {
r.logger.Infof("Using requested mode '%s'", mode)
logger.Infof("Using requested mode '%s'", mode)
return mode
}
defer func() {
r.logger.Infof("Auto-detected mode as '%v'", rmode)
logger.Infof("Auto-detected mode as '%v'", rmode)
}()
if image.OnlyFullyQualifiedCDIDevices() {
return "cdi"
}
isTegra, reason := r.info.IsTegraSystem()
r.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
nvinfo := info.New(
info.WithLogger(logger),
info.WithPropertyExtractor(propertyExtractor),
)
hasNVML, reason := r.info.HasNvml()
r.logger.Debugf("Has NVML? %v: %v", hasNVML, reason)
usesNVGPUModule, reason := r.info.UsesNVGPUModule()
r.logger.Debugf("Uses nvgpu kernel module? %v: %v", usesNVGPUModule, reason)
if (isTegra && !hasNVML) || usesNVGPUModule {
switch nvinfo.ResolvePlatform() {
case info.PlatformNVML, info.PlatformWSL:
return "legacy"
case info.PlatformTegra:
return "csv"
}
return "legacy"
}

View File

@@ -19,6 +19,7 @@ package info
import (
"testing"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
@@ -202,23 +203,24 @@ func TestResolveAutoMode(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
info := &infoInterfaceMock{
properties := &info.PropertyExtractorMock{
HasNvmlFunc: func() (bool, string) {
return tc.info["nvml"], "nvml"
},
HasDXCoreFunc: func() (bool, string) {
return tc.info["dxcore"], "dxcore"
},
IsTegraSystemFunc: func() (bool, string) {
return tc.info["tegra"], "tegra"
},
UsesNVGPUModuleFunc: func() (bool, string) {
HasTegraFilesFunc: func() (bool, string) {
return tc.info["tegra"], "tegra"
},
UsesOnlyNVGPUModuleFunc: func() (bool, string) {
return tc.info["nvgpu"], "nvgpu"
},
}
r := resolver{
logger: logger,
info: info,
}
var mounts []specs.Mount
for _, d := range tc.mounts {
mount := specs.Mount{
@@ -231,7 +233,7 @@ func TestResolveAutoMode(t *testing.T) {
image.WithEnvMap(tc.envmap),
image.WithMounts(mounts),
)
mode := r.resolveMode(tc.mode, image)
mode := resolveMode(logger, tc.mode, image, properties)
require.EqualValues(t, tc.expectedMode, mode)
})
}

View File

@@ -1,194 +0,0 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package info
import (
"sync"
)
// Ensure, that infoInterfaceMock does implement infoInterface.
// If this is not the case, regenerate this file with moq.
var _ infoInterface = &infoInterfaceMock{}
// infoInterfaceMock is a mock implementation of infoInterface.
//
// func TestSomethingThatUsesinfoInterface(t *testing.T) {
//
// // make and configure a mocked infoInterface
// mockedinfoInterface := &infoInterfaceMock{
// HasDXCoreFunc: func() (bool, string) {
// panic("mock out the HasDXCore method")
// },
// HasNvmlFunc: func() (bool, string) {
// panic("mock out the HasNvml method")
// },
// IsTegraSystemFunc: func() (bool, string) {
// panic("mock out the IsTegraSystem method")
// },
// UsesNVGPUModuleFunc: func() (bool, string) {
// panic("mock out the UsesNVGPUModule method")
// },
// }
//
// // use mockedinfoInterface in code that requires infoInterface
// // and then make assertions.
//
// }
type infoInterfaceMock struct {
// HasDXCoreFunc mocks the HasDXCore method.
HasDXCoreFunc func() (bool, string)
// HasNvmlFunc mocks the HasNvml method.
HasNvmlFunc func() (bool, string)
// IsTegraSystemFunc mocks the IsTegraSystem method.
IsTegraSystemFunc func() (bool, string)
// UsesNVGPUModuleFunc mocks the UsesNVGPUModule method.
UsesNVGPUModuleFunc func() (bool, string)
// calls tracks calls to the methods.
calls struct {
// HasDXCore holds details about calls to the HasDXCore method.
HasDXCore []struct {
}
// HasNvml holds details about calls to the HasNvml method.
HasNvml []struct {
}
// IsTegraSystem holds details about calls to the IsTegraSystem method.
IsTegraSystem []struct {
}
// UsesNVGPUModule holds details about calls to the UsesNVGPUModule method.
UsesNVGPUModule []struct {
}
}
lockHasDXCore sync.RWMutex
lockHasNvml sync.RWMutex
lockIsTegraSystem sync.RWMutex
lockUsesNVGPUModule sync.RWMutex
}
// HasDXCore calls HasDXCoreFunc.
func (mock *infoInterfaceMock) HasDXCore() (bool, string) {
callInfo := struct {
}{}
mock.lockHasDXCore.Lock()
mock.calls.HasDXCore = append(mock.calls.HasDXCore, callInfo)
mock.lockHasDXCore.Unlock()
if mock.HasDXCoreFunc == nil {
var (
bOut bool
sOut string
)
return bOut, sOut
}
return mock.HasDXCoreFunc()
}
// HasDXCoreCalls gets all the calls that were made to HasDXCore.
// Check the length with:
//
// len(mockedinfoInterface.HasDXCoreCalls())
func (mock *infoInterfaceMock) HasDXCoreCalls() []struct {
} {
var calls []struct {
}
mock.lockHasDXCore.RLock()
calls = mock.calls.HasDXCore
mock.lockHasDXCore.RUnlock()
return calls
}
// HasNvml calls HasNvmlFunc.
func (mock *infoInterfaceMock) HasNvml() (bool, string) {
callInfo := struct {
}{}
mock.lockHasNvml.Lock()
mock.calls.HasNvml = append(mock.calls.HasNvml, callInfo)
mock.lockHasNvml.Unlock()
if mock.HasNvmlFunc == nil {
var (
bOut bool
sOut string
)
return bOut, sOut
}
return mock.HasNvmlFunc()
}
// HasNvmlCalls gets all the calls that were made to HasNvml.
// Check the length with:
//
// len(mockedinfoInterface.HasNvmlCalls())
func (mock *infoInterfaceMock) HasNvmlCalls() []struct {
} {
var calls []struct {
}
mock.lockHasNvml.RLock()
calls = mock.calls.HasNvml
mock.lockHasNvml.RUnlock()
return calls
}
// IsTegraSystem calls IsTegraSystemFunc.
func (mock *infoInterfaceMock) IsTegraSystem() (bool, string) {
callInfo := struct {
}{}
mock.lockIsTegraSystem.Lock()
mock.calls.IsTegraSystem = append(mock.calls.IsTegraSystem, callInfo)
mock.lockIsTegraSystem.Unlock()
if mock.IsTegraSystemFunc == nil {
var (
bOut bool
sOut string
)
return bOut, sOut
}
return mock.IsTegraSystemFunc()
}
// IsTegraSystemCalls gets all the calls that were made to IsTegraSystem.
// Check the length with:
//
// len(mockedinfoInterface.IsTegraSystemCalls())
func (mock *infoInterfaceMock) IsTegraSystemCalls() []struct {
} {
var calls []struct {
}
mock.lockIsTegraSystem.RLock()
calls = mock.calls.IsTegraSystem
mock.lockIsTegraSystem.RUnlock()
return calls
}
// UsesNVGPUModule calls UsesNVGPUModuleFunc.
func (mock *infoInterfaceMock) UsesNVGPUModule() (bool, string) {
callInfo := struct {
}{}
mock.lockUsesNVGPUModule.Lock()
mock.calls.UsesNVGPUModule = append(mock.calls.UsesNVGPUModule, callInfo)
mock.lockUsesNVGPUModule.Unlock()
if mock.UsesNVGPUModuleFunc == nil {
var (
bOut bool
sOut string
)
return bOut, sOut
}
return mock.UsesNVGPUModuleFunc()
}
// UsesNVGPUModuleCalls gets all the calls that were made to UsesNVGPUModule.
// Check the length with:
//
// len(mockedinfoInterface.UsesNVGPUModuleCalls())
func (mock *infoInterfaceMock) UsesNVGPUModuleCalls() []struct {
} {
var calls []struct {
}
mock.lockUsesNVGPUModule.RLock()
calls = mock.calls.UsesNVGPUModule
mock.lockUsesNVGPUModule.RUnlock()
return calls
}

View File

@@ -0,0 +1,62 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devices
type builder struct {
asMap devices
filter func(string) bool
}
// New creates a new devices struct with the specified options.
func New(opts ...Option) Devices {
b := &builder{}
for _, opt := range opts {
opt(b)
}
if b.filter == nil {
b.filter = func(string) bool { return false }
}
devices := make(devices)
for k, v := range b.asMap {
if b.filter(string(k)) {
continue
}
devices[k] = v
}
return devices
}
type Option func(*builder)
// WithDeviceToMajor specifies an explicit device name to major number map.
func WithDeviceToMajor(deviceToMajor map[string]int) Option {
return func(b *builder) {
b.asMap = make(devices)
for name, major := range deviceToMajor {
b.asMap[Name(name)] = Major(major)
}
}
}
// WithFilter specifies a filter to exclude devices.
func WithFilter(filter func(string) bool) Option {
return func(b *builder) {
b.filter = filter
}
}

View File

@@ -33,7 +33,7 @@ const (
NVIDIAModesetMinor = 254
NVIDIAFrontend = Name("nvidia-frontend")
NVIDIAGPU = NVIDIAFrontend
NVIDIAGPU = Name("nvidia")
NVIDIACaps = Name("nvidia-caps")
NVIDIAUVM = Name("nvidia-uvm")
@@ -53,22 +53,43 @@ type Major int
type Devices interface {
Exists(Name) bool
Get(Name) (Major, bool)
Count() int
}
type devices map[Name]Major
var _ Devices = devices(nil)
// Count returns the number of devices defined.
func (d devices) Count() int {
return len(d)
}
// Exists checks if a Device with a given name exists or not
func (d devices) Exists(name Name) bool {
_, exists := d[name]
_, exists := d.Get(name)
return exists
}
// Get a Device from Devices
// Get a Device from Devices. It also has fallback logic to ensure device name changes in /proc/devices are handled
// For e.g:- For GPU drivers 550.40.x or greater, the gpu device has been renamed from "nvidia-frontend" to "nvidia".
func (d devices) Get(name Name) (Major, bool) {
device, exists := d[name]
return device, exists
for _, n := range name.getWithFallback() {
device, exists := d[n]
if exists {
return device, true
}
}
return 0, false
}
// getWithFallback returns a prioritised list of device names for a specific name.
// This allows multiple names to be associated with a single name to support various driver versions.
func (n Name) getWithFallback() []Name {
if n == NVIDIAGPU || n == NVIDIAFrontend {
return []Name{NVIDIAGPU, NVIDIAFrontend}
}
return []Name{n}
}
// GetNVIDIADevices returns the set of NVIDIA Devices on the machine
@@ -94,27 +115,23 @@ func nvidiaDevices(devicesPath string) (Devices, error) {
var errNoNvidiaDevices = errors.New("no NVIDIA devices found")
func nvidiaDeviceFrom(reader io.Reader) (devices, error) {
func nvidiaDeviceFrom(reader io.Reader) (Devices, error) {
allDevices := devicesFrom(reader)
nvidiaDevices := make(devices)
var hasNvidiaDevices bool
for n, d := range allDevices {
if !strings.HasPrefix(string(n), nvidiaDevicePrefix) {
continue
}
nvidiaDevices[n] = d
hasNvidiaDevices = true
}
if !hasNvidiaDevices {
nvidiaDevices := New(
WithDeviceToMajor(allDevices),
WithFilter(func(n string) bool {
return !strings.HasPrefix(n, nvidiaDevicePrefix)
}),
)
if nvidiaDevices.Count() == 0 {
return nil, errNoNvidiaDevices
}
return nvidiaDevices, nil
}
func devicesFrom(reader io.Reader) devices {
allDevices := make(devices)
func devicesFrom(reader io.Reader) map[string]int {
allDevices := make(map[string]int)
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
device, major, err := processProcDeviceLine(scanner.Text())
@@ -126,11 +143,11 @@ func devicesFrom(reader io.Reader) devices {
return allDevices
}
func processProcDeviceLine(line string) (Name, Major, error) {
func processProcDeviceLine(line string) (string, int, error) {
trimmed := strings.TrimSpace(line)
var name Name
var major Major
var name string
var major int
n, _ := fmt.Sscanf(trimmed, "%d %s", &major, &name)
if n == 2 {

View File

@@ -17,6 +17,9 @@ var _ Devices = &DevicesMock{}
//
// // make and configure a mocked Devices
// mockedDevices := &DevicesMock{
// CountFunc: func() int {
// panic("mock out the Count method")
// },
// ExistsFunc: func(name Name) bool {
// panic("mock out the Exists method")
// },
@@ -30,6 +33,9 @@ var _ Devices = &DevicesMock{}
//
// }
type DevicesMock struct {
// CountFunc mocks the Count method.
CountFunc func() int
// ExistsFunc mocks the Exists method.
ExistsFunc func(name Name) bool
@@ -38,6 +44,9 @@ type DevicesMock struct {
// calls tracks calls to the methods.
calls struct {
// Count holds details about calls to the Count method.
Count []struct {
}
// Exists holds details about calls to the Exists method.
Exists []struct {
// Name is the name argument value.
@@ -49,10 +58,41 @@ type DevicesMock struct {
Name Name
}
}
lockCount sync.RWMutex
lockExists sync.RWMutex
lockGet sync.RWMutex
}
// Count calls CountFunc.
func (mock *DevicesMock) Count() int {
callInfo := struct {
}{}
mock.lockCount.Lock()
mock.calls.Count = append(mock.calls.Count, callInfo)
mock.lockCount.Unlock()
if mock.CountFunc == nil {
var (
nOut int
)
return nOut
}
return mock.CountFunc()
}
// CountCalls gets all the calls that were made to Count.
// Check the length with:
//
// len(mockedDevices.CountCalls())
func (mock *DevicesMock) CountCalls() []struct {
} {
var calls []struct {
}
mock.lockCount.RLock()
calls = mock.calls.Count
mock.lockCount.RUnlock()
return calls
}
// Exists calls ExistsFunc.
func (mock *DevicesMock) Exists(name Name) bool {
callInfo := struct {

View File

@@ -25,22 +25,46 @@ import (
)
func TestNvidiaDevices(t *testing.T) {
devices := map[Name]Major{
"nvidia-frontend": 195,
"nvidia-nvlink": 234,
"nvidia-caps": 235,
"nvidia-uvm": 510,
"nvidia-nvswitch": 511,
perDriverDeviceMaps := map[string]map[string]int{
"pre550": {
"nvidia-frontend": 195,
"nvidia-nvlink": 234,
"nvidia-caps": 235,
"nvidia-uvm": 510,
"nvidia-nvswitch": 511,
},
"post550": {
"nvidia": 195,
"nvidia-nvlink": 234,
"nvidia-caps": 235,
"nvidia-uvm": 510,
"nvidia-nvswitch": 511,
},
}
nvidiaDevices := testDevices(devices)
for name, major := range devices {
device, exists := nvidiaDevices.Get(name)
require.True(t, exists, "Unexpected missing device")
require.Equal(t, device, major, "Unexpected device major")
for k, devices := range perDriverDeviceMaps {
nvidiaDevices := New(WithDeviceToMajor(devices))
t.Run(k, func(t *testing.T) {
// Each of the expected devices needs to exist.
for name, major := range devices {
device, exists := nvidiaDevices.Get(Name(name))
require.True(t, exists)
require.Equal(t, device, Major(major))
}
// An unexpected device cannot exist
_, exists := nvidiaDevices.Get("bogus")
require.False(t, exists)
// Regardles of the driver version, the nvidia and nvidia-frontend
// names are supported and have the same value.
nvidia, exists := nvidiaDevices.Get(NVIDIAGPU)
require.True(t, exists)
nvidiaFrontend, exists := nvidiaDevices.Get(NVIDIAFrontend)
require.True(t, exists)
require.Equal(t, nvidia, nvidiaFrontend)
})
}
_, exists := nvidiaDevices.Get("bogus")
require.False(t, exists, "Unexpected 'bogus' device found")
}
func TestProcessDeviceFile(t *testing.T) {
@@ -52,6 +76,7 @@ func TestProcessDeviceFile(t *testing.T) {
{lines: []string{}, expectedError: errNoNvidiaDevices},
{lines: []string{"Not a valid line:"}, expectedError: errNoNvidiaDevices},
{lines: []string{"195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
{lines: []string{"195 nvidia"}, expected: devices{"nvidia": 195}},
{lines: []string{"195 nvidia-frontend", "235 nvidia-caps"}, expected: devices{"nvidia-frontend": 195, "nvidia-caps": 235}},
{lines: []string{" 195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
{lines: []string{"Not a valid line:", "", "195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
@@ -63,7 +88,10 @@ func TestProcessDeviceFile(t *testing.T) {
d, err := nvidiaDeviceFrom(contents)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expected, d)
if tc.expectedError == nil {
require.EqualValues(t, tc.expected, d.(devices))
}
})
}
}
@@ -71,8 +99,8 @@ func TestProcessDeviceFile(t *testing.T) {
func TestProcessDeviceFileLine(t *testing.T) {
testCases := []struct {
line string
name Name
major Major
name string
major int
err bool
}{
{"", "", 0, true},
@@ -97,8 +125,3 @@ func TestProcessDeviceFileLine(t *testing.T) {
})
}
}
// testDevices creates a set of test NVIDIA devices
func testDevices(d map[Name]Major) Devices {
return devices(d)
}

View File

@@ -0,0 +1,45 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package root
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
type Option func(*Driver)
func WithLogger(logger logger.Interface) Option {
return func(d *Driver) {
d.logger = logger
}
}
func WithDriverRoot(root string) Option {
return func(d *Driver) {
d.Root = root
}
}
func WithLibrarySearchPaths(paths ...string) Option {
return func(d *Driver) {
d.librarySearchPaths = paths
}
}
func WithConfigSearchPaths(paths ...string) Option {
return func(d *Driver) {
d.configSearchPaths = paths
}
}

View File

@@ -17,6 +17,7 @@
package root
import (
"os"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
@@ -30,27 +31,65 @@ type Driver struct {
Root string
// librarySearchPaths specifies explicit search paths for discovering libraries.
librarySearchPaths []string
// configSearchPaths specified explicit search paths for discovering driver config files.
configSearchPaths []string
}
// New creates a new Driver root at the specified path.
// TODO: Use functional options here.
func New(logger logger.Interface, path string, librarySearchPaths []string) *Driver {
return &Driver{
logger: logger,
Root: path,
librarySearchPaths: normalizeSearchPaths(librarySearchPaths...),
// New creates a new Driver root using the specified options.
func New(opts ...Option) *Driver {
d := &Driver{}
for _, opt := range opts {
opt(d)
}
if d.logger == nil {
d.logger = logger.New()
}
return d
}
// Drivers returns a Locator for driver libraries.
// Files returns a Locator for arbitrary driver files.
func (r *Driver) Files(opts ...lookup.Option) lookup.Locator {
return lookup.NewFileLocator(
append(opts,
lookup.WithLogger(r.logger),
lookup.WithRoot(r.Root),
)...,
)
}
// Libraries returns a Locator for driver libraries.
func (r *Driver) Libraries() lookup.Locator {
return lookup.NewLibraryLocator(
lookup.WithLogger(r.logger),
lookup.WithRoot(r.Root),
lookup.WithSearchPaths(r.librarySearchPaths...),
lookup.WithSearchPaths(normalizeSearchPaths(r.librarySearchPaths...)...),
)
}
// Configs returns a locator for driver configs.
// If configSearchPaths is specified, these paths are used as absolute paths,
// otherwise, /etc and /usr/share are searched.
func (r *Driver) Configs() lookup.Locator {
return lookup.NewFileLocator(r.configSearchOptions()...)
}
func (r *Driver) configSearchOptions() []lookup.Option {
if len(r.configSearchPaths) > 0 {
return []lookup.Option{
lookup.WithLogger(r.logger),
lookup.WithRoot("/"),
lookup.WithSearchPaths(normalizeSearchPaths(r.configSearchPaths...)...),
}
}
searchPaths := []string{"/etc"}
searchPaths = append(searchPaths, xdgDataDirs()...)
return []lookup.Option{
lookup.WithLogger(r.logger),
lookup.WithRoot(r.Root),
lookup.WithSearchPaths(searchPaths...),
}
}
// normalizeSearchPaths takes a list of paths and normalized these.
// Each of the elements in the list is expanded if it is a path list and the
// resultant list is returned.
@@ -63,3 +102,13 @@ func normalizeSearchPaths(paths ...string) []string {
}
return normalized
}
// xdgDataDirs finds the paths as specified in the environment variable XDG_DATA_DIRS.
// See https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html.
func xdgDataDirs() []string {
if dirs, exists := os.LookupEnv("XDG_DATA_DIRS"); exists && dirs != "" {
return normalizeSearchPaths(dirs)
}
return []string{"/usr/local/share", "/usr/share"}
}

View File

@@ -185,7 +185,7 @@ func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, de
func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devices []string) (spec.Interface, error) {
cdilib, err := nvcdi.New(
nvcdi.WithLogger(logger),
nvcdi.WithNVIDIACTKPath(cfg.NVIDIACTKConfig.Path),
nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path),
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
nvcdi.WithVendor("runtime.nvidia.com"),
nvcdi.WithClass("gpu"),

View File

@@ -62,7 +62,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUD
cdilib, err := nvcdi.New(
nvcdi.WithLogger(logger),
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
nvcdi.WithNVIDIACTKPath(cfg.NVIDIACTKConfig.Path),
nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path),
nvcdi.WithMode(nvcdi.ModeCSV),
nvcdi.WithCSVFiles(csvFiles),
)

View File

@@ -26,13 +26,6 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
const (
nvidiaGDSEnvvar = "NVIDIA_GDS"
nvidiaMOFEDEnvvar = "NVIDIA_MOFED"
nvidiaNVSWITCHEnvvar = "NVIDIA_NVSWITCH"
nvidiaGDRCOPYEnvvar = "NVIDIA_GDRCOPY"
)
// NewFeatureGatedModifier creates the modifiers for optional features.
// These include:
//
@@ -53,7 +46,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
devRoot := cfg.NVIDIAContainerCLIConfig.Root
if image.Getenv(nvidiaGDSEnvvar) == "enabled" {
if cfg.Features.IsEnabled(config.FeatureGDS, image) {
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err)
@@ -61,7 +54,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d)
}
if image.Getenv(nvidiaMOFEDEnvvar) == "enabled" {
if cfg.Features.IsEnabled(config.FeatureMOFED, image) {
d, err := discover.NewMOFEDDiscoverer(logger, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %w", err)
@@ -69,7 +62,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d)
}
if image.Getenv(nvidiaNVSWITCHEnvvar) == "enabled" {
if cfg.Features.IsEnabled(config.FeatureNVSWITCH, image) {
d, err := discover.NewNvSwitchDiscoverer(logger, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for NVSWITCH devices: %w", err)
@@ -77,7 +70,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d)
}
if image.Getenv(nvidiaGDRCOPYEnvvar) == "enabled" {
if cfg.Features.IsEnabled(config.FeatureGDRCopy, image) {
d, err := discover.NewGDRCopyDiscoverer(logger, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDRCopy devices: %w", err)

View File

@@ -29,20 +29,18 @@ import (
// NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification.
// The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver) (oci.SpecModifier, error) {
if required, reason := requiresGraphicsModifier(image); !required {
logger.Infof("No graphics modifier required: %v", reason)
return nil, nil
}
// TODO: We should not just pass `nil` as the search path here.
driver := root.New(logger, cfg.NVIDIAContainerCLIConfig.Root, nil)
nvidiaCTKPath := cfg.NVIDIACTKConfig.Path
nvidiaCDIHookPath := cfg.NVIDIACTKConfig.Path
mounts, err := discover.NewGraphicsMountsDiscoverer(
logger,
driver,
nvidiaCTKPath,
nvidiaCDIHookPath,
)
if err != nil {
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
@@ -54,7 +52,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag
logger,
image.DevicesFromEnvvars(visibleDevicesEnvvar),
devRoot,
nvidiaCTKPath,
nvidiaCDIHookPath,
)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer: %v", err)

View File

@@ -90,10 +90,9 @@ func TestDiscovererFromCSVFiles(t *testing.T) {
expectedHooks: []discover.Hook{
{
Lifecycle: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Path: "/usr/bin/nvidia-cdi-hook",
Args: []string{
"nvidia-ctk",
"hook",
"nvidia-cdi-hook",
"create-symlinks",
"--link",
"/usr/lib/aarch64-linux-gnu/tegra/libv4l2_nvargus.so::/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvargus.so",
@@ -147,10 +146,9 @@ func TestDiscovererFromCSVFiles(t *testing.T) {
expectedHooks: []discover.Hook{
{
Lifecycle: "createContainer",
Path: "/usr/bin/nvidia-ctk",
Path: "/usr/bin/nvidia-cdi-hook",
Args: []string{
"nvidia-ctk",
"hook",
"nvidia-cdi-hook",
"create-symlinks",
"--link",
"/usr/lib/aarch64-linux-gnu/tegra/libv4l2_nvargus.so::/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvargus.so",
@@ -189,7 +187,7 @@ func TestDiscovererFromCSVFiles(t *testing.T) {
o := tegraOptions{
logger: logger,
nvidiaCTKPath: "/usr/bin/nvidia-ctk",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
csvFiles: []string{"dummy"},
ignorePatterns: tc.ignorePatterns,
symlinkLocator: tc.symlinkLocator,

View File

@@ -28,10 +28,10 @@ import (
type symlinkHook struct {
discover.None
logger logger.Interface
nvidiaCTKPath string
targets []string
mountsFrom discover.Discover
logger logger.Interface
nvidiaCDIHookPath string
targets []string
mountsFrom discover.Discover
// The following can be overridden for testing
symlinkChainLocator lookup.Locator
@@ -42,7 +42,7 @@ type symlinkHook struct {
func (o tegraOptions) createCSVSymlinkHooks(targets []string, mounts discover.Discover) discover.Discover {
return symlinkHook{
logger: o.logger,
nvidiaCTKPath: o.nvidiaCTKPath,
nvidiaCDIHookPath: o.nvidiaCDIHookPath,
targets: targets,
mountsFrom: mounts,
symlinkChainLocator: o.symlinkChainLocator,
@@ -60,7 +60,7 @@ func (d symlinkHook) Hooks() ([]discover.Hook, error) {
csvSymlinks := d.getCSVFileSymlinks()
return discover.CreateCreateSymlinkHook(
d.nvidiaCTKPath,
d.nvidiaCDIHookPath,
append(csvSymlinks, specificLinks...),
).Hooks()
}

View File

@@ -30,7 +30,7 @@ type tegraOptions struct {
csvFiles []string
driverRoot string
devRoot string
nvidiaCTKPath string
nvidiaCDIHookPath string
ldconfigPath string
librarySearchPaths []string
ignorePatterns ignoreMountSpecPatterns
@@ -80,7 +80,7 @@ func New(opts ...Option) (discover.Discover, error) {
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
}
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.nvidiaCTKPath, o.ldconfigPath)
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(o.logger, csvDiscoverer, o.nvidiaCDIHookPath, o.ldconfigPath)
if err != nil {
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
}
@@ -133,10 +133,10 @@ func WithCSVFiles(csvFiles []string) Option {
}
}
// WithNVIDIACTKPath sets the path to the nvidia-container-toolkit binary.
func WithNVIDIACTKPath(nvidiaCTKPath string) Option {
// WithNVIDIACDIHookPath sets the path to the nvidia-cdi-hook binary.
func WithNVIDIACDIHookPath(nvidiaCDIHookPath string) Option {
return func(o *tegraOptions) {
o.nvidiaCTKPath = nvidiaCTKPath
o.nvidiaCDIHookPath = nvidiaCDIHookPath
}
}

View File

@@ -26,6 +26,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
)
// Run is an entry point that allows for idiomatic handling of errors
@@ -65,6 +66,7 @@ func (r rt) Run(argv []string) (rerr error) {
if r.modeOverride != "" {
cfg.NVIDIAContainerRuntimeConfig.Mode = r.modeOverride
}
//nolint:staticcheck // TODO(elezar): We should swith the nvidia-container-runtime from using nvidia-ctk to using nvidia-cdi-hook.
cfg.NVIDIACTKConfig.Path = config.ResolveNVIDIACTKPath(r.logger, cfg.NVIDIACTKConfig.Path)
cfg.NVIDIAContainerRuntimeHookConfig.Path = config.ResolveNVIDIAContainerRuntimeHookPath(r.logger, cfg.NVIDIAContainerRuntimeHookConfig.Path)
@@ -76,8 +78,13 @@ func (r rt) Run(argv []string) (rerr error) {
r.logger.Infof("Running with config:\n%+v", cfg)
}
driver := root.New(
root.WithLogger(r.logger),
root.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
)
r.logger.Debugf("Command line arguments: %v", argv)
runtime, err := newNVIDIAContainerRuntime(r.logger, cfg, argv)
runtime, err := newNVIDIAContainerRuntime(r.logger, cfg, argv, driver)
if err != nil {
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
}

View File

@@ -23,12 +23,13 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
// newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger
func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv []string) (oci.Runtime, error) {
func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv []string, driver *root.Driver) (oci.Runtime, error) {
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, cfg.NVIDIAContainerRuntimeConfig.Runtimes)
if err != nil {
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
@@ -44,7 +45,7 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
}
specModifier, err := newSpecModifier(logger, cfg, ociSpec)
specModifier, err := newSpecModifier(logger, cfg, ociSpec, driver)
if err != nil {
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
}
@@ -61,7 +62,7 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
}
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
@@ -82,7 +83,7 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
return modeModifier, nil
}
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image)
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver)
if err != nil {
return nil, err
}

View File

@@ -29,6 +29,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
)
@@ -63,6 +64,9 @@ func TestMain(m *testing.M) {
func TestFactoryMethod(t *testing.T) {
logger, _ := testlog.NewNullLogger()
driver := root.New(
root.WithDriverRoot("/nvidia/driver/root"),
)
testCases := []struct {
description string
@@ -143,6 +147,7 @@ func TestFactoryMethod(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
bundleDir := t.TempDir()
specFile, err := os.Create(filepath.Join(bundleDir, "config.json"))
@@ -151,7 +156,7 @@ func TestFactoryMethod(t *testing.T) {
argv := []string{"--bundle", bundleDir, "create"}
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv, driver)
if tc.expectedError {
require.Error(t, err)
} else {

View File

@@ -29,15 +29,18 @@ import (
func TestCreateControlDevices(t *testing.T) {
logger, _ := testlog.NewNullLogger()
nvidiaDevices := &devices.DevicesMock{
GetFunc: func(name devices.Name) (devices.Major, bool) {
devices := map[devices.Name]devices.Major{
"nvidia-frontend": 195,
"nvidia-uvm": 243,
}
return devices[name], true
},
}
nvidiaDevices := devices.New(
devices.WithDeviceToMajor(map[string]int{
"nvidia-frontend": 195,
"nvidia-uvm": 243,
}),
)
nvidia550Devices := devices.New(
devices.WithDeviceToMajor(map[string]int{
"nvidia": 195,
"nvidia-uvm": 243,
}),
)
mknodeError := errors.New("mknode error")
@@ -54,7 +57,7 @@ func TestCreateControlDevices(t *testing.T) {
}
}{
{
description: "no root specified",
description: "no root specified; pre 550 driver",
root: "",
devices: nvidiaDevices,
mknodeError: nil,
@@ -69,6 +72,22 @@ func TestCreateControlDevices(t *testing.T) {
{"/dev/nvidia-uvm-tools", 243, 1},
},
},
{
description: "no root specified; 550 driver",
root: "",
devices: nvidia550Devices,
mknodeError: nil,
expectedCalls: []struct {
S string
N1 int
N2 int
}{
{"/dev/nvidiactl", 195, 255},
{"/dev/nvidia-modeset", 195, 254},
{"/dev/nvidia-uvm", 243, 0},
{"/dev/nvidia-uvm-tools", 243, 1},
},
},
{
description: "some root specified",
root: "/some/root",
@@ -130,5 +149,4 @@ func TestCreateControlDevices(t *testing.T) {
require.EqualValues(t, tc.expectedCalls, mknode.MknodeCalls())
})
}
}

View File

@@ -1,2 +1,3 @@
nvidia-container-runtime /usr/bin
nvidia-ctk /usr/bin
nvidia-cdi-hook /usr/bin

View File

@@ -16,6 +16,7 @@ Source2: LICENSE
Source3: nvidia-container-runtime
Source4: nvidia-container-runtime.cdi
Source5: nvidia-container-runtime.legacy
Source6: nvidia-cdi-hook
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
Provides: nvidia-container-runtime
@@ -27,7 +28,7 @@ Requires: nvidia-container-toolkit-base == %{version}-%{release}
Provides tools and utilities to enable GPU support in containers.
%prep
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} .
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} .
%install
mkdir -p %{buildroot}%{_bindir}
@@ -36,6 +37,7 @@ install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
%post
if [ $1 -gt 1 ]; then # only on package upgrade
@@ -86,6 +88,7 @@ Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit
%license LICENSE
%{_bindir}/nvidia-container-runtime
%{_bindir}/nvidia-ctk
%{_bindir}/nvidia-cdi-hook
# The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes.
# This package is not distributed as part of the NVIDIA Container Toolkit RPMs.

View File

@@ -19,7 +19,7 @@ package engine
// Interface defines the API for a runtime config updater.
type Interface interface {
DefaultRuntime() string
AddRuntime(string, string, bool) error
AddRuntime(string, string, bool, ...map[string]interface{}) error
Set(string, interface{})
RemoveRuntime(string) error
Save(string) (int64, error)

View File

@@ -30,7 +30,7 @@ type ConfigV1 Config
var _ engine.Interface = (*ConfigV1)(nil)
// AddRuntime adds a runtime to the containerd config
func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error {
func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool, configOverrides ...map[string]interface{}) error {
if c == nil || c.Tree == nil {
return fmt.Errorf("config is nil")
}
@@ -75,6 +75,16 @@ func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error
}
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "BinaryName"}, path)
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "Runtime"}, path)
defaultRuntimeSubtree := subtreeAtPath(config, "plugins", "cri", "containerd", "default_runtime")
if err := defaultRuntimeSubtree.applyOverrides(configOverrides...); err != nil {
return fmt.Errorf("failed to apply config overrides to default_runtime: %w", err)
}
}
runtimeSubtree := subtreeAtPath(config, "plugins", "cri", "containerd", "runtimes", name)
if err := runtimeSubtree.applyOverrides(configOverrides...); err != nil {
return fmt.Errorf("failed to apply config overrides: %w", err)
}
*c.Tree = config

View File

@@ -25,7 +25,7 @@ import (
)
// AddRuntime adds a runtime to the containerd config
func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
func (c *Config) AddRuntime(name string, path string, setAsDefault bool, configOverrides ...map[string]interface{}) error {
if c == nil || c.Tree == nil {
return fmt.Errorf("config is nil")
}
@@ -60,6 +60,11 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}, name)
}
runtimeSubtree := subtreeAtPath(config, "plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name)
if err := runtimeSubtree.applyOverrides(configOverrides...); err != nil {
return fmt.Errorf("failed to apply config overrides: %w", err)
}
*c.Tree = config
return nil
}

View File

@@ -0,0 +1,97 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"testing"
"github.com/pelletier/go-toml"
"github.com/stretchr/testify/require"
)
func TestAddRuntime(t *testing.T) {
testCases := []struct {
description string
config string
setAsDefault bool
configOverrides []map[string]interface{}
expectedConfig string
expectedError error
}{
{
description: "empty config not default runtime",
expectedConfig: `
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = ""
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test.options]
BinaryName = "/usr/bin/test"
`,
expectedError: nil,
},
{
description: "empty config not default runtime with overrides",
configOverrides: []map[string]interface{}{
{
"options": map[string]interface{}{
"SystemdCgroup": true,
},
},
},
expectedConfig: `
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = ""
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.test.options]
BinaryName = "/usr/bin/test"
SystemdCgroup = true
`,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
config, err := toml.Load(tc.config)
require.NoError(t, err)
expectedConfig, err := toml.Load(tc.expectedConfig)
require.NoError(t, err)
c := &Config{
Tree: config,
}
err = c.AddRuntime("test", "/usr/bin/test", tc.setAsDefault, tc.configOverrides...)
require.NoError(t, err)
require.EqualValues(t, expectedConfig.String(), config.String())
})
}
}

View File

@@ -0,0 +1,56 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"fmt"
"github.com/pelletier/go-toml"
)
// tomlTree is an alias for toml.Tree that allows for extensions.
type tomlTree toml.Tree
func subtreeAtPath(c toml.Tree, path ...string) *tomlTree {
tree := c.GetPath(path).(*toml.Tree)
return (*tomlTree)(tree)
}
func (t *tomlTree) insert(other map[string]interface{}) error {
for key, value := range other {
if insertsubtree, ok := value.(map[string]interface{}); ok {
subtree := (*toml.Tree)(t).Get(key).(*toml.Tree)
return (*tomlTree)(subtree).insert(insertsubtree)
}
(*toml.Tree)(t).Set(key, value)
}
return nil
}
func (t *tomlTree) applyOverrides(overrides ...map[string]interface{}) error {
for _, override := range overrides {
subconfig, err := toml.TreeFromMap(override)
if err != nil {
return fmt.Errorf("invalid toml config: %w", err)
}
if err := t.insert(subconfig.ToMap()); err != nil {
return err
}
}
return nil
}

View File

@@ -40,7 +40,7 @@ func New(opts ...Option) (engine.Interface, error) {
}
// AddRuntime adds a new runtime to the crio config
func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
func (c *Config) AddRuntime(name string, path string, setAsDefault bool, _ ...map[string]interface{}) error {
if c == nil {
return fmt.Errorf("config is nil")
}

View File

@@ -49,7 +49,7 @@ func New(opts ...Option) (engine.Interface, error) {
}
// AddRuntime adds a new runtime to the docker config
func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
func (c *Config) AddRuntime(name string, path string, setAsDefault bool, _ ...map[string]interface{}) error {
if c == nil {
return fmt.Errorf("config is nil")
}

View File

@@ -48,8 +48,8 @@ type Interface interface {
GetCommonEdits() (*cdi.ContainerEdits, error)
GetAllDeviceSpecs() ([]specs.Device, error)
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error)
GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error)
GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error)
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error)
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error)
GetDeviceSpecsByID(...string) ([]specs.Device, error)
}

View File

@@ -36,12 +36,12 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
},
)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driver, l.nvidiaCTKPath)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath)
if err != nil {
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
}
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCTKPath, l.ldconfigPath, l.nvmllib)
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
}

View File

@@ -22,7 +22,7 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"golang.org/x/sys/unix"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
@@ -34,7 +34,7 @@ import (
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
// The supplied NVML Library is used to query the expected driver version.
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
if r := nvmllib.Init(); r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to initialize NVML: %v", r)
}
@@ -49,11 +49,11 @@ func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTK
return nil, fmt.Errorf("failed to determine driver version: %v", r)
}
return newDriverVersionDiscoverer(logger, driver, nvidiaCTKPath, ldconfigPath, version)
return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
}
func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath, ldconfigPath, version string) (discover.Discover, error) {
libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCTKPath, ldconfigPath, version)
func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
}
@@ -81,7 +81,7 @@ func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nv
}
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath, ldconfigPath, version string) (discover.Discover, error) {
func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
libraryPaths, err := getVersionLibs(logger, driver, version)
if err != nil {
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
@@ -97,7 +97,7 @@ func NewDriverLibraryDiscoverer(logger logger.Interface, driver *root.Driver, nv
libraryPaths,
)
hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath, ldconfigPath)
hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath)
d := discover.Merge(
libraries,

View File

@@ -39,7 +39,7 @@ var requiredDriverStoreFiles = []string{
}
// newWSLDriverDiscoverer returns a Discoverer for WSL2 drivers.
func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath, ldconfigPath string) (discover.Discover, error) {
func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCDIHookPath, ldconfigPath string) (discover.Discover, error) {
err := dxcore.Init()
if err != nil {
return nil, fmt.Errorf("failed to initialize dxcore: %v", err)
@@ -56,11 +56,11 @@ func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCT
}
logger.Infof("Using WSL driver store paths: %v", driverStorePaths)
return newWSLDriverStoreDiscoverer(logger, driverRoot, nvidiaCTKPath, ldconfigPath, driverStorePaths)
return newWSLDriverStoreDiscoverer(logger, driverRoot, nvidiaCDIHookPath, ldconfigPath, driverStorePaths)
}
// newWSLDriverStoreDiscoverer returns a Discoverer for WSL2 drivers in the driver store associated with a dxcore adapter.
func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, ldconfigPath string, driverStorePaths []string) (discover.Discover, error) {
func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvidiaCDIHookPath string, ldconfigPath string, driverStorePaths []string) (discover.Discover, error) {
var searchPaths []string
seen := make(map[string]bool)
for _, path := range driverStorePaths {
@@ -88,12 +88,12 @@ func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvi
)
symlinkHook := nvidiaSMISimlinkHook{
logger: logger,
mountsFrom: libraries,
nvidiaCTKPath: nvidiaCTKPath,
logger: logger,
mountsFrom: libraries,
nvidiaCDIHookPath: nvidiaCDIHookPath,
}
ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath, ldconfigPath)
ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCDIHookPath, ldconfigPath)
d := discover.Merge(
libraries,
@@ -106,9 +106,9 @@ func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, nvi
type nvidiaSMISimlinkHook struct {
discover.None
logger logger.Interface
mountsFrom discover.Discover
nvidiaCTKPath string
logger logger.Interface
mountsFrom discover.Discover
nvidiaCDIHookPath string
}
// Hooks returns a hook that creates a symlink to nvidia-smi in the driver store.
@@ -135,7 +135,7 @@ func (m nvidiaSMISimlinkHook) Hooks() ([]discover.Hook, error) {
}
link := "/usr/bin/nvidia-smi"
links := []string{fmt.Sprintf("%s::%s", target, link)}
symlinkHook := discover.CreateCreateSymlinkHook(m.nvidiaCTKPath, links)
symlinkHook := discover.CreateCreateSymlinkHook(m.nvidiaCDIHookPath, links)
return symlinkHook.Hooks()
}

View File

@@ -92,8 +92,8 @@ func TestNvidiaSMISymlinkHook(t *testing.T) {
expectedHooks: []discover.Hook{
{
Lifecycle: "createContainer",
Path: "nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "create-symlinks",
Path: "nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "create-symlinks",
"--link", "nvidia-smi::/usr/bin/nvidia-smi"},
},
},
@@ -112,8 +112,8 @@ func TestNvidiaSMISymlinkHook(t *testing.T) {
expectedHooks: []discover.Hook{
{
Lifecycle: "createContainer",
Path: "nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "create-symlinks",
Path: "nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "create-symlinks",
"--link", "/some/path/nvidia-smi::/usr/bin/nvidia-smi"},
},
},
@@ -132,8 +132,8 @@ func TestNvidiaSMISymlinkHook(t *testing.T) {
expectedHooks: []discover.Hook{
{
Lifecycle: "createContainer",
Path: "nvidia-ctk",
Args: []string{"nvidia-ctk", "hook", "create-symlinks",
Path: "nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "create-symlinks",
"--link", "/some/path/nvidia-smi::/usr/bin/nvidia-smi"},
},
},
@@ -143,9 +143,9 @@ func TestNvidiaSMISymlinkHook(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
m := nvidiaSMISimlinkHook{
logger: logger,
mountsFrom: tc.mounts,
nvidiaCTKPath: "nvidia-ctk",
logger: logger,
mountsFrom: tc.mounts,
nvidiaCDIHookPath: "nvidia-cdi-hook",
}
devices, err := m.Devices()

View File

@@ -23,7 +23,7 @@ import (
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
@@ -34,28 +34,31 @@ import (
)
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
edits, err := l.GetGPUDeviceEdits(d)
if err != nil {
return nil, fmt.Errorf("failed to get edits for device: %v", err)
}
name, err := l.deviceNamer.GetDeviceName(i, convert{d})
var deviceSpecs []specs.Device
names, err := l.deviceNamers.GetDeviceNames(i, convert{d})
if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err)
}
spec := specs.Device{
Name: name,
ContainerEdits: *edits.ContainerEdits,
for _, name := range names {
spec := specs.Device{
Name: name,
ContainerEdits: *edits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, spec)
}
return &spec, nil
return deviceSpecs, nil
}
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) {
device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCTKPath, d)
device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCDIHookPath, d)
if err != nil {
return nil, fmt.Errorf("failed to create device discoverer: %v", err)
}
@@ -70,17 +73,17 @@ func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
type byPathHookDiscoverer struct {
logger logger.Interface
devRoot string
nvidiaCTKPath string
pciBusID string
deviceNodes discover.Discover
logger logger.Interface
devRoot string
nvidiaCDIHookPath string
pciBusID string
deviceNodes discover.Discover
}
var _ discover.Discover = (*byPathHookDiscoverer)(nil)
// newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCDIHookPath string, d device.Device) (discover.Discover, error) {
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
minor, ret := d.GetMinorNumber()
@@ -109,17 +112,17 @@ func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath
)
byPathHooks := &byPathHookDiscoverer{
logger: logger,
devRoot: devRoot,
nvidiaCTKPath: nvidiaCTKPath,
pciBusID: pciBusID,
deviceNodes: deviceNodes,
logger: logger,
devRoot: devRoot,
nvidiaCDIHookPath: nvidiaCDIHookPath,
pciBusID: pciBusID,
deviceNodes: deviceNodes,
}
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
logger,
devRoot,
nvidiaCTKPath,
nvidiaCDIHookPath,
deviceNodes,
)
@@ -154,8 +157,8 @@ func (d *byPathHookDiscoverer) Hooks() ([]discover.Hook, error) {
args = append(args, "--link", l)
}
hook := discover.CreateNvidiaCTKHook(
d.nvidiaCTKPath,
hook := discover.CreateNvidiaCDIHook(
d.nvidiaCDIHookPath,
"create-symlinks",
args...,
)

View File

@@ -68,7 +68,7 @@ func (l *gdslib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
}
// GetGPUDeviceSpecs is unsupported for the gdslib specs
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
}
@@ -78,7 +78,7 @@ func (l *gdslib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
}
// GetMIGDeviceSpecs is unsupported for the gdslib specs
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
}

View File

@@ -44,7 +44,7 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
tegra.WithLogger(l.logger),
tegra.WithDriverRoot(l.driverRoot),
tegra.WithDevRoot(l.devRoot),
tegra.WithNVIDIACTKPath(l.nvidiaCTKPath),
tegra.WithNVIDIACDIHookPath(l.nvidiaCDIHookPath),
tegra.WithLdconfigPath(l.ldconfigPath),
tegra.WithCSVFiles(l.csvFiles),
tegra.WithLibrarySearchPaths(l.librarySearchPaths...),
@@ -58,16 +58,20 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
return nil, fmt.Errorf("failed to create container edits for CSV files: %v", err)
}
name, err := l.deviceNamer.GetDeviceName(0, uuidUnsupported{})
names, err := l.deviceNamers.GetDeviceNames(0, uuidIgnored{})
if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: name,
ContainerEdits: *e.ContainerEdits,
var deviceSpecs []specs.Device
for _, name := range names {
deviceSpec := specs.Device{
Name: name,
ContainerEdits: *e.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
}
return []specs.Device{deviceSpec}, nil
return deviceSpecs, nil
}
// GetCommonEdits generates a CDI specification that can be used for ANY devices
@@ -82,7 +86,7 @@ func (l *csvlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
}
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported for CSV files")
}
@@ -92,7 +96,7 @@ func (l *csvlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
}
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files")
}

View File

@@ -22,7 +22,7 @@ import (
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
@@ -80,7 +80,17 @@ func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
// GetDeviceSpecsByID returns the CDI device specs for the GPU(s) represented by
// the provided identifiers, where an identifier is an index or UUID of a valid
// GPU device.
func (l *nvmllib) GetDeviceSpecsByID(identifiers ...string) ([]specs.Device, error) {
// Deprecated: Use GetDeviceSpecsBy instead.
func (l *nvmllib) GetDeviceSpecsByID(ids ...string) ([]specs.Device, error) {
var identifiers []device.Identifier
for _, id := range ids {
identifiers = append(identifiers, device.Identifier(id))
}
return l.GetDeviceSpecsBy(identifiers...)
}
// GetDeviceSpecsBy is not supported for the gdslib specs.
func (l *nvmllib) GetDeviceSpecsBy(identifiers ...device.Identifier) ([]specs.Device, error) {
for _, id := range identifiers {
if id == "all" {
return l.GetAllDeviceSpecs()
@@ -109,7 +119,7 @@ func (l *nvmllib) GetDeviceSpecsByID(identifiers ...string) ([]specs.Device, err
return nil, fmt.Errorf("failed to get CDI device edits for identifier %q: %w", identifiers[i], err)
}
deviceSpec := specs.Device{
Name: identifiers[i],
Name: string(identifiers[i]),
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
@@ -119,7 +129,7 @@ func (l *nvmllib) GetDeviceSpecsByID(identifiers ...string) ([]specs.Device, err
}
// TODO: move this to go-nvlib?
func (l *nvmllib) getNVMLDevicesByID(identifiers ...string) ([]nvml.Device, error) {
func (l *nvmllib) getNVMLDevicesByID(identifiers ...device.Identifier) ([]nvml.Device, error) {
var devices []nvml.Device
for _, id := range identifiers {
dev, err := l.getNVMLDeviceByID(id)
@@ -131,25 +141,24 @@ func (l *nvmllib) getNVMLDevicesByID(identifiers ...string) ([]nvml.Device, erro
return devices, nil
}
func (l *nvmllib) getNVMLDeviceByID(id string) (nvml.Device, error) {
func (l *nvmllib) getNVMLDeviceByID(id device.Identifier) (nvml.Device, error) {
var err error
devID := device.Identifier(id)
if devID.IsUUID() {
return l.nvmllib.DeviceGetHandleByUUID(id)
if id.IsUUID() {
return l.nvmllib.DeviceGetHandleByUUID(string(id))
}
if devID.IsGpuIndex() {
if idx, err := strconv.Atoi(id); err == nil {
if id.IsGpuIndex() {
if idx, err := strconv.Atoi(string(id)); err == nil {
return l.nvmllib.DeviceGetHandleByIndex(idx)
}
return nil, fmt.Errorf("failed to convert device index to an int: %w", err)
}
if devID.IsMigIndex() {
if id.IsMigIndex() {
var gpuIdx, migIdx int
var parent nvml.Device
split := strings.SplitN(id, ":", 2)
split := strings.SplitN(string(id), ":", 2)
if gpuIdx, err = strconv.Atoi(split[0]); err != nil {
return nil, fmt.Errorf("failed to convert device index to an int: %w", err)
}
@@ -208,11 +217,11 @@ func (l *nvmllib) getEditsForMIGDevice(nvmlDevice nvml.Device) (*cdi.ContainerEd
func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
var deviceSpecs []specs.Device
err := l.devicelib.VisitDevices(func(i int, d device.Device) error {
deviceSpec, err := l.GetGPUDeviceSpecs(i, d)
specsForDevice, err := l.GetGPUDeviceSpecs(i, d)
if err != nil {
return err
}
deviceSpecs = append(deviceSpecs, *deviceSpec)
deviceSpecs = append(deviceSpecs, specsForDevice...)
return nil
})
@@ -225,11 +234,11 @@ func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
func (l *nvmllib) getMigDeviceSpecs() ([]specs.Device, error) {
var deviceSpecs []specs.Device
err := l.devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
deviceSpec, err := l.GetMIGDeviceSpecs(i, d, j, mig)
specsForDevice, err := l.GetMIGDeviceSpecs(i, d, j, mig)
if err != nil {
return err
}
deviceSpecs = append(deviceSpecs, *deviceSpec)
deviceSpecs = append(deviceSpecs, specsForDevice...)
return nil
})

View File

@@ -54,7 +54,7 @@ func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) {
// GetCommonEdits generates a CDI specification that can be used for ANY devices
func (l *wsllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
driver, err := newWSLDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.ldconfigPath)
driver, err := newWSLDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCDIHookPath, l.ldconfigPath)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for WSL driver: %v", err)
}
@@ -68,7 +68,7 @@ func (l *wsllib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
}
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported on WSL")
}
@@ -78,7 +78,7 @@ func (l *wsllib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
}
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported on WSL")
}

View File

@@ -21,7 +21,8 @@ import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
@@ -44,11 +45,12 @@ type nvcdilib struct {
nvmllib nvml.Interface
mode string
devicelib device.Interface
deviceNamer DeviceNamer
deviceNamers DeviceNamers
driverRoot string
devRoot string
nvidiaCTKPath string
nvidiaCDIHookPath string
ldconfigPath string
configSearchPaths []string
librarySearchPaths []string
csvFiles []string
@@ -75,8 +77,12 @@ func New(opts ...Option) (Interface, error) {
if l.logger == nil {
l.logger = logger.New()
}
if l.deviceNamer == nil {
l.deviceNamer, _ = NewDeviceNamer(DeviceNameStrategyIndex)
if len(l.deviceNamers) == 0 {
indexNamer, _ := NewDeviceNamer(DeviceNameStrategyIndex)
l.deviceNamers = []DeviceNamer{indexNamer}
}
if l.nvidiaCDIHookPath == "" {
l.nvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook"
}
if l.driverRoot == "" {
l.driverRoot = "/"
@@ -84,16 +90,35 @@ func New(opts ...Option) (Interface, error) {
if l.devRoot == "" {
l.devRoot = l.driverRoot
}
if l.nvidiaCTKPath == "" {
l.nvidiaCTKPath = "/usr/bin/nvidia-ctk"
l.driver = root.New(
root.WithLogger(l.logger),
root.WithDriverRoot(l.driverRoot),
root.WithLibrarySearchPaths(l.librarySearchPaths...),
)
if l.nvmllib == nil {
var nvmlOpts []nvml.LibraryOption
candidates, err := l.driver.Libraries().Locate("libnvidia-ml.so.1")
if err != nil {
l.logger.Warningf("Ignoring error in locating libnvidia-ml.so.1: %v", err)
} else {
libNvidiaMlPath := candidates[0]
l.logger.Infof("Using %v", libNvidiaMlPath)
nvmlOpts = append(nvmlOpts, nvml.WithLibraryPath(libNvidiaMlPath))
}
l.nvmllib = nvml.New(nvmlOpts...)
}
if l.devicelib == nil {
l.devicelib = device.New(l.nvmllib)
}
if l.infolib == nil {
l.infolib = info.New()
l.infolib = info.New(
info.WithRoot(l.driverRoot),
info.WithLogger(l.logger),
info.WithNvmlLib(l.nvmllib),
info.WithDeviceLib(l.devicelib),
)
}
// TODO: We need to improve the construction of this driver root.
l.driver = root.New(l.logger, l.driverRoot, l.librarySearchPaths)
var lib Interface
switch l.resolveMode() {
case ModeCSV:
@@ -107,13 +132,6 @@ func New(opts ...Option) (Interface, error) {
}
lib = (*managementlib)(l)
case ModeNvml:
if l.nvmllib == nil {
l.nvmllib = nvml.New()
}
if l.devicelib == nil {
l.devicelib = device.New(device.WithNvml(l.nvmllib))
}
lib = (*nvmllib)(l)
case ModeWsl:
lib = (*wsllib)(l)
@@ -161,32 +179,36 @@ func (l *wrapper) GetSpec() (spec.Interface, error) {
)
}
// GetCommonEdits returns the wrapped edits and adds additional edits on top.
func (m *wrapper) GetCommonEdits() (*cdi.ContainerEdits, error) {
edits, err := m.Interface.GetCommonEdits()
if err != nil {
return nil, err
}
edits.Env = append(edits.Env, "NVIDIA_VISIBLE_DEVICES=void")
return edits, nil
}
// resolveMode resolves the mode for CDI spec generation based on the current system.
func (l *nvcdilib) resolveMode() (rmode string) {
if l.mode != ModeAuto {
return l.mode
}
defer func() {
l.logger.Infof("Auto-detected mode as %q", rmode)
l.logger.Infof("Auto-detected mode as '%v'", rmode)
}()
isWSL, reason := l.infolib.HasDXCore()
l.logger.Debugf("Is WSL-based system? %v: %v", isWSL, reason)
if isWSL {
platform := l.infolib.ResolvePlatform()
switch platform {
case info.PlatformNVML:
return ModeNvml
case info.PlatformTegra:
return ModeCSV
case info.PlatformWSL:
return ModeWsl
}
isNvml, reason := l.infolib.HasNvml()
l.logger.Debugf("Is NVML-based system? %v: %v", isNvml, reason)
isTegra, reason := l.infolib.IsTegraSystem()
l.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
if isTegra && !isNvml {
return ModeCSV
}
l.logger.Warningf("Unsupported platform detected: %v; assuming %v", platform, ModeNvml)
return ModeNvml
}

View File

@@ -1,116 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvcdi
import (
"fmt"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestResolveMode(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
mode string
isTegra bool
hasDXCore bool
hasNVML bool
expected string
}{
{
mode: "auto",
hasDXCore: true,
expected: "wsl",
},
{
mode: "auto",
hasDXCore: false,
isTegra: true,
hasNVML: false,
expected: "csv",
},
{
mode: "auto",
hasDXCore: false,
isTegra: false,
hasNVML: false,
expected: "nvml",
},
{
mode: "auto",
hasDXCore: false,
isTegra: true,
hasNVML: true,
expected: "nvml",
},
{
mode: "auto",
hasDXCore: false,
isTegra: false,
expected: "nvml",
},
{
mode: "nvml",
hasDXCore: true,
isTegra: true,
expected: "nvml",
},
{
mode: "wsl",
hasDXCore: false,
expected: "wsl",
},
{
mode: "not-auto",
hasDXCore: true,
expected: "not-auto",
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
l := nvcdilib{
logger: logger,
mode: tc.mode,
infolib: infoMock{hasDXCore: tc.hasDXCore, isTegra: tc.isTegra, hasNVML: tc.hasNVML},
}
require.Equal(t, tc.expected, l.resolveMode())
})
}
}
type infoMock struct {
hasDXCore bool
isTegra bool
hasNVML bool
}
func (i infoMock) HasDXCore() (bool, string) {
return i.hasDXCore, ""
}
func (i infoMock) HasNvml() (bool, string) {
return i.hasNVML, ""
}
func (i infoMock) IsTegraSystem() (bool, string) {
return i.isTegra, ""
}

View File

@@ -66,7 +66,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
return nil, fmt.Errorf("failed to get CUDA version: %v", err)
}
driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCTKPath, m.ldconfigPath, version)
driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version)
if err != nil {
return nil, fmt.Errorf("failed to create driver library discoverer: %v", err)
}
@@ -123,7 +123,7 @@ func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, erro
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
m.logger,
m.devRoot,
m.nvidiaCTKPath,
m.nvidiaCDIHookPath,
deviceNodes,
)
@@ -175,7 +175,7 @@ func (m *managementlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, e
}
// GetGPUDeviceSpecs is unsupported for the managementlib specs
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
}
@@ -185,7 +185,7 @@ func (m *managementlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi
}
// GetMIGDeviceSpecs is unsupported for the managementlib specs
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
}

Some files were not shown because too many files have changed in this diff Show More