Compare commits

...

286 Commits

Author SHA1 Message Date
Evan Lezar
c5a93b8d70 Merge branch 'cherry-pick-wsl2' into 'release-1.13'
Backport CDI fixes for 1.13.3 release

See merge request nvidia/container-toolkit/container-toolkit!429
2023-06-27 18:40:19 +00:00
Evan Lezar
cc06766f25 Merge branch 'fix-load-kernel-modules' into 'main'
Split internal system package

See merge request nvidia/container-toolkit/container-toolkit!420
2023-06-27 17:36:57 +02:00
Evan Lezar
7c807c2c22 Merge branch 'CNT-4302/cdi-only' into 'main'
Skip additional modifications in CDI mode

See merge request nvidia/container-toolkit/container-toolkit!413
2023-06-27 17:08:14 +02:00
Evan Lezar
89781ad6a3 Merge branch 'use-major-minor-for-cuda-version' into 'main'
Use *.* pattern when locating libcuda.so

See merge request nvidia/container-toolkit/container-toolkit!397
2023-06-27 16:59:35 +02:00
Evan Lezar
f677245d60 Merge branch 'fix-multiple-driver-roots-wsl' into 'main'
Fix bug with multiple driver store paths

See merge request nvidia/container-toolkit/container-toolkit!425
2023-06-27 16:59:33 +02:00
Evan Lezar
9d31bd4cc3 Merge branch 'fix-cdi-permissions' into 'main'
Properly set spec permissions

See merge request nvidia/container-toolkit/container-toolkit!383
2023-06-27 16:27:28 +02:00
Carlos Eduardo Arango Gutierrez
b063fa40b1 Merge branch 'fix-cdi-spec-permissions' into 'main'
Generate CDI specifications with 644 permissions to allow non-root clients to consume them

See merge request nvidia/container-toolkit/container-toolkit!381
2023-06-27 16:27:27 +02:00
Evan Lezar
9b7904e0bb Merge branch 'CNT-4075' into 'release-1.13'
Allow same envars for all runtime configs

See merge request nvidia/container-toolkit/container-toolkit!419
2023-06-27 14:26:29 +00:00
Evan Lezar
a9ccef6090 Ensure common envvars have higher precedence
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 16:25:26 +02:00
Carlos Eduardo Arango Gutierrez
a4e13c5197 Add entry to changelog
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
2023-06-27 16:25:24 +02:00
Evan Lezar
c9a8b7f335 Ensure runtime dir is set for crio cleanup
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 16:25:00 +02:00
Evan Lezar
591e610905 Remove unused constants and variables
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 16:25:00 +02:00
Evan Lezar
524802df2b Rework restart logic
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 16:25:00 +02:00
Evan Lezar
19ca4338f2 Add version info to config CLIs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 16:25:00 +02:00
Evan Lezar
56c533d7d4 Refactor toolking to setup and cleanup configs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 16:25:00 +02:00
Evan Lezar
b9b19494d0 Add runtimeDir as argument
Thsi change adds the --nvidia-runtime-dir as a command line
argument when configuring container runtimes in the toolkit container.
This removes the need to set it via the command line.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 16:25:00 +02:00
Evan Lezar
47b6b01f48 Allow same envars for all runtime configs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 16:25:00 +02:00
Evan Lezar
7a70850679 Merge branch 'bump-version-v1.13.3' into 'release-1.13'
Bump version to v1.13.3

See merge request nvidia/container-toolkit/container-toolkit!428
2023-06-27 14:14:02 +00:00
Evan Lezar
6052b3eba3 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 15:30:46 +02:00
Evan Lezar
4ae775d683 Bump version to 1.13.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 15:29:19 +02:00
Evan Lezar
d9cf2682f9 Merge branch 'remove-libnvidia-container-workaround' into 'release-1.13'
Merge branch 'revert-kitmaker-workaround' into 'main'

See merge request nvidia/container-toolkit/container-toolkit!417
2023-06-06 20:19:00 +00:00
Evan Lezar
1e5a6e1fa3 Merge branch 'revert-kitmaker-workaround' into 'main'
Remove workaround to add libnvidia-container0 to kitmaker archive

See merge request nvidia/container-toolkit/container-toolkit!378
2023-06-06 22:17:30 +02:00
Evan Lezar
5b81e30704 Merge branch 'cherry-pick-for-v1.13.2' into 'release-1.13'
Cherry pick changes for 1.13.2

See merge request nvidia/container-toolkit/container-toolkit!407
2023-06-06 19:03:42 +00:00
Evan Lezar
a34b08908e Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-06 20:14:27 +02:00
Evan Lezar
234f7ebd5f Merge branch 'bump-cuda-base-image' into 'main'
Bump CUDA baseimage version to 12.1.1

See merge request nvidia/container-toolkit/container-toolkit!412
2023-06-01 14:49:08 +02:00
Evan Lezar
36d1b7d2a5 Merge branch 'treat-log-errors-as-non-fatal' into 'main'
Ignore errors when creating debug log file

See merge request nvidia/container-toolkit/container-toolkit!404
2023-06-01 14:48:33 +02:00
Evan Lezar
b776bf712e Merge branch 'add-mod-probe' into 'main'
Add option to load NVIDIA kernel modules

See merge request nvidia/container-toolkit/container-toolkit!409
2023-06-01 14:48:33 +02:00
Evan Lezar
90cbe938c3 Update CHANGELOG for cherry-pick
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-01 14:48:33 +02:00
Evan Lezar
8697267e6b Merge branch 'fix-device-symlinks' into 'main'
Fix creation of device symlinks in /dev/char

See merge request nvidia/container-toolkit/container-toolkit!399
2023-06-01 14:48:33 +02:00
Evan Lezar
2d7bb636b9 Merge branch 'CNT-4285/add-runtime-hook-path' into 'main'
Add nvidia-contianer-runtime-hook.path config option

See merge request nvidia/container-toolkit/container-toolkit!401
2023-06-01 14:48:33 +02:00
Evan Lezar
7386f86904 Merge branch 'bump-runc' into 'main'
Bump golang version and update dependencies

See merge request nvidia/container-toolkit/container-toolkit!377
2023-06-01 14:47:53 +02:00
Evan Lezar
2bcb2d633d Bump version to 1.13.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-01 08:26:59 +02:00
Evan Lezar
fac0697c93 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-01 08:26:55 +02:00
Evan Lezar
595692b9bd Set libnvidia-container branch to release-1.13
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-01 08:26:06 +02:00
Evan Lezar
f13d4402bd Merge branch 'update-release-1.13' into 'release-1.13'
Skip updating of components on release-1.13 branch

See merge request nvidia/container-toolkit/container-toolkit!405
2023-05-26 09:09:46 +00:00
Evan Lezar
968dce5a70 Skip updating of components
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-24 10:45:26 +02:00
Evan Lezar
28b70663f1 Merge branch 'skip-for-point-release' into 'main'
Skip components for patch releases

See merge request nvidia/container-toolkit/container-toolkit!374
2023-04-24 12:12:36 +00:00
Evan Lezar
c0fe8f27eb Skip components for patch releases
This change ensures that the nvidia-docker2 and nvidia-container-runtime
components are not build and distributed for patch releases.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-24 14:00:10 +02:00
Evan Lezar
926ac77bc0 Merge branch 'fix-cdi-spec-generation-on-debian' into 'main'
Resolve all symlinks when finding libraries in LDCache

See merge request nvidia/container-toolkit/container-toolkit!370
2023-04-24 10:09:37 +00:00
Evan Lezar
fc7c8f7520 Resolve all symlinks in ldcache
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-21 17:28:49 +02:00
Evan Lezar
46c1c45d85 Add /usr/lib/current to search path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-21 11:47:42 +02:00
Evan Lezar
f99e863649 Merge branch 'CNT-4142/xorg-missing-not-fatal' into 'main'
Make discovery of Xorg libraries optional

See merge request nvidia/container-toolkit/container-toolkit!368
2023-04-21 09:47:10 +00:00
Evan Lezar
dcc21ece97 Merge branch 'add-debug-output' into 'main'
Fix target folder for kitmaker

See merge request nvidia/container-toolkit/container-toolkit!373
2023-04-20 18:38:05 +00:00
Evan Lezar
a53e3604a6 Fix target folder for kitmaker
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-20 20:37:46 +02:00
Evan Lezar
cfea6c1179 Merge branch 'add-debug-output' into 'main'
Properly create target folder for kitmaker

See merge request nvidia/container-toolkit/container-toolkit!372
2023-04-20 14:53:57 +00:00
Evan Lezar
4d1daa0b6c Properly create target folder for kitmaker
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-20 16:53:32 +02:00
Evan Lezar
df925bc7fd Merge branch 'include-libnvidia-container-in-kitmaker' into 'main'
Add a workaround to publish libnvidia-container0

See merge request nvidia/container-toolkit/container-toolkit!371
2023-04-20 08:29:02 +00:00
Evan Lezar
df22e37dfd Add a workaround to publish libnvidia-container0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-18 23:23:36 +02:00
Evan Lezar
2136266d1d Make discovery of Xorg libraries optional
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-13 18:41:38 +02:00
Evan Lezar
a95232dd33 Merge branch 'CNT-4144/non-ldcache' into 'main'
Only update ldcache if it exists

See merge request nvidia/container-toolkit/container-toolkit!369
2023-04-13 16:12:22 +00:00
Evan Lezar
29c6288128 Only update ldcache if it exists
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-13 17:18:09 +02:00
Evan Lezar
cd6fcb5297 Merge branch 'bump-version-1.13.1' into 'main'
Bump version to 1.13.1

See merge request nvidia/container-toolkit/container-toolkit!367
2023-04-13 11:12:37 +00:00
Evan Lezar
36989deff7 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-13 13:11:39 +02:00
Evan Lezar
7f6c9851fe Bump version to 1.13.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-13 13:11:09 +02:00
Evan Lezar
b7079454b5 Merge branch 'bump-versions' into 'main'
Bump nvidia-docker and nvidia-container-runtime versions

See merge request nvidia/container-toolkit/container-toolkit!366
2023-03-31 13:00:58 +00:00
Evan Lezar
448bd45ab4 Bump nvidia-docker and nvidia-container-runtime versions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-31 15:00:15 +02:00
Evan Lezar
dde6170df1 Merge branch 'bump-version-v1.13.0' into 'main'
Bump version to v1.13.0

See merge request nvidia/container-toolkit/container-toolkit!365
2023-03-31 10:58:18 +00:00
Evan Lezar
e4b9350e65 Update libnvidia-container to v1.13.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-31 11:38:01 +02:00
Evan Lezar
622a0649ce Bump version to v1.13.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-31 11:35:53 +02:00
Evan Lezar
f6983969ad Merge branch 'nvidia-ctk-cdi-transform' into 'main'
Add 'target-driver-root' option to 'nvidia-ctk cdi generate' to transform root...

See merge request nvidia/container-toolkit/container-toolkit!363
2023-03-28 20:05:12 +00:00
Evan Lezar
7f7fc35843 Move input and output to transform root subcommand
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 21:12:48 +02:00
Evan Lezar
8eef7e5406 Merge branch 'add-runtimes' into 'main'
Add nvidia-container-runtime.runtimes config option

See merge request nvidia/container-toolkit/container-toolkit!364
2023-03-28 18:58:46 +00:00
Evan Lezar
f27c33b45f Remove target-driver-root from generate
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 11:49:45 -07:00
Evan Lezar
6a83e2ebe5 Add nvidia-ctk cdi transform root command
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 11:45:58 -07:00
Christopher Desiniotis
ee5be5e3f2 Merge branch 'CNT-4056/add-cdi-annotations' into 'main'
Add nvidia-container-runtime.modes.cdi.annotation-prefixes config option.

See merge request nvidia/container-toolkit/container-toolkit!356
2023-03-28 16:47:51 +00:00
Evan Lezar
be0cc9dc6e Add nvidia-container-runtime.runtimes config option
This change adds an nvidia-container-runtime.runtimes config option.

If this is unset no changes are made to the config and the default values are used. This
allows this setting to be overridden in cases where this is required. One such example is
crio where crun is set as the default runtime.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 17:39:17 +02:00
Evan Lezar
7c5283bb97 Merge branch 'create-device-nodes' into 'main'
Add nvidia-ctk system create-device-nodes command

See merge request nvidia/container-toolkit/container-toolkit!362
2023-03-28 15:07:04 +00:00
Evan Lezar
4d5ba09d88 Add --ignore-errors option for testing
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 16:24:17 +02:00
Evan Lezar
149236b002 Configure containerd config based on specified annotation prefixes
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 16:22:48 +02:00
Evan Lezar
ee141f97dc Reorganise setting toolkit config options
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 16:22:48 +02:00
Evan Lezar
646503ff31 Set nvidia-container-runtime.modes.cdi.annotation-prefixes in toolkit-contianer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 16:22:48 +02:00
Evan Lezar
cdaaf5e46f Generate device nodes when creating management spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 11:29:45 +02:00
Evan Lezar
e774c51c97 Add nvidia-ctk system create-device-nodes command
This change adds an nvidia-ctk system create-device-nodes command for
creating NVIDIA device nodes. Currently this is limited to control devices
(nvidia-uvm, nvidia-uvm-tools, nvidia-modeset, nvidiactl).

A --dry-run mode is included for outputing commands that would be executed and
the driver root can be specified.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 11:29:45 +02:00
Christopher Desiniotis
7f5c9abc1e Add ability to configure CDI kind with 'nvidia-ctk cdi generate'
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-27 23:12:00 -07:00
Christopher Desiniotis
92d82ceaee Add 'target-driver-root' option to 'nvidia-ctk cdi generate' to transform root paths in generated spec
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-27 22:22:36 -07:00
Evan Lezar
c46b118f37 Add nvidia-container-runtime.modes.cdi.annotation-prefixes config option.
This change adds an nvidia-container-runtime.modes.cdi.annotation-prefixes config
option that defaults to cdi.k8s.io/. This allows the annotation prefixes parsed
for CDI devices to be overridden in cases where CDI support in container engines such
as containerd or crio need to be overridden.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-27 16:36:54 +02:00
Evan Lezar
1722b07615 Merge branch 'CNT-2264/xorg-libs' into 'main'
Inject xorg libs and config in container

See merge request nvidia/container-toolkit/container-toolkit!328
2023-03-27 14:19:52 +00:00
Evan Lezar
c13c6ebadb Inject xorg libs and config in container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 17:04:06 +02:00
Evan Lezar
2abe679dd1 Move libcuda locator to internal/lookup package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 17:04:06 +02:00
Evan Lezar
9571513601 Merge branch 'update-changelog' into 'main'
Update changelog

See merge request nvidia/container-toolkit/container-toolkit!361
2023-03-26 15:03:28 +00:00
Evan Lezar
ff2767ee7b Reorder changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 17:03:05 +02:00
Evan Lezar
56319475a6 Merge branch 'fix-changelog' into 'main'
Reorder changelog

See merge request nvidia/container-toolkit/container-toolkit!360
2023-03-26 14:52:27 +00:00
Evan Lezar
a3ee58a294 Reorder changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 16:51:59 +02:00
Evan Lezar
7a533aeff3 Merge branch 'update-nvcdi-new-with-error' into 'main'
Allow nvcdi.Option to return an error

See merge request nvidia/container-toolkit/container-toolkit!352
2023-03-26 14:13:41 +00:00
Evan Lezar
226c54613e Also return an error from nvcdi.New
This change allows nvcdi.New to return an error in addition to the
constructed library instead of panicing.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 16:13:12 +02:00
Evan Lezar
1ebbebf5de Merge branch 'CNT-3932/deduplicate-entries-in-cdi-spec' into 'main'
Add transform to deduplicate entities in CDI spec

See merge request nvidia/container-toolkit/container-toolkit!345
2023-03-24 19:04:43 +00:00
Evan Lezar
33f6fe0217 Generate a simplified CDI spec by default
As simplified CDI spec has no duplicate entities in any single set of container edits.
Furthermore, contianer edits defined at a spec-level are not included in the container
edits for a device.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-24 11:01:46 +02:00
Evan Lezar
5ff206e1a9 Add transform to deduplicate entities in CDI spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-24 11:01:23 +02:00
Evan Lezar
df618d3cba Merge branch 'CNT-4052/fix-arm-management-containers' into 'main'
Fix generation of management CDI spec in containers

See merge request nvidia/container-toolkit/container-toolkit!354
2023-03-23 16:39:10 +00:00
Evan Lezar
9506bd9da0 Fix generation of management CDI spec in containers
Since we relied on finding libcuda.so in the LDCache to determine both the CUDA
version and the expected directory for the driver libraries, the generation of the
management CDI specifications fails in containers where the LDCache has not been updated.

This change falls back to searching a set of predefined paths instead when the lookup of
libcuda.so in the cache fails.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-23 15:59:01 +02:00
Evan Lezar
5e0684e99d Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!353
2023-03-23 08:50:18 +00:00
Evan Lezar
09a0cb24cc Remove fedora make targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-23 10:35:57 +02:00
Evan Lezar
ff92f1d799 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-23 10:33:26 +02:00
Christopher Desiniotis
b87703c503 Merge branch 'fix-nil-logger-in-library-locator' into 'main'
Instantiate a logger when constructing a library Locator

See merge request nvidia/container-toolkit/container-toolkit!351
2023-03-21 21:54:14 +00:00
Christopher Desiniotis
b2aaa21b0a Instantiate a logger when constructing a library Locator
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-21 13:38:36 -07:00
Evan Lezar
310c15b046 Merge branch 'CNT-4026/only-init-nvml-when-required' into 'main'
Only init nvml as required when generating CDI specs

See merge request nvidia/container-toolkit/container-toolkit!344
2023-03-20 13:26:07 +00:00
Evan Lezar
685802b1ce Only init nvml as required when generating CDI specs
CDI generation modes such as management and wsl don't require
NVML. This change removes the top-level instantiation of nvmllib
and replaces it with an instanitation in the nvml CDI spec generation
code.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-20 14:24:08 +02:00
Evan Lezar
380eb8340a Merge branch 'blossom-ci' into 'main'
Add blossom-ci github action

See merge request nvidia/container-toolkit/container-toolkit!349
2023-03-20 09:56:23 +00:00
Evan Lezar
f98e1160f5 Update components with blossim-ci
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-20 11:06:44 +02:00
Evan Lezar
1962fd68df Merge branch 'locate-ipc-sockets-at-run' into 'main'
Locate persistenced and fabricmanager sockets at /run instead of /var/run

See merge request nvidia/container-toolkit/container-toolkit!347
2023-03-20 08:08:59 +00:00
Carlos Eduardo Arango Gutierrez
29813c1e14 Add blossom-ci github action
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
2023-03-17 16:16:27 +01:00
Evan Lezar
df40fbe03e Locate persistenced and fabricmanager sockets at /run instead of /var/run
This chagne prefers (non-symlink) sockets at /run over /var/run for
nvidia-persistenced and nvidia-fabricmanager sockets.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-17 09:23:48 +02:00
Carlos Eduardo Arango Gutierrez
7000c6074e Merge branch 'ci_rules' into 'main'
Rework pipeline triggers for MRs

See merge request nvidia/container-toolkit/container-toolkit!346
2023-03-15 13:15:23 +00:00
Evan Lezar
ef1fe3ab41 Rework pipeline triggers for MRs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 14:15:20 +02:00
Evan Lezar
fdd198b0e8 Merge branch 'bump-v1.13.0-rc.3' into 'main'
Bump version to v1.13.0-rc.3

See merge request nvidia/container-toolkit/container-toolkit!343
2023-03-15 07:50:50 +00:00
Evan Lezar
e37f77e02d Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 09:49:49 +02:00
Evan Lezar
3fcfee88be Bump version to v1.13.0-rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 09:26:19 +02:00
Evan Lezar
a082413d09 Merge branch 'trigger-ci-on-mrs-only' into 'main'
Add workflow rule to only trigger on MRs

See merge request nvidia/container-toolkit/container-toolkit!342
2023-03-15 07:10:30 +00:00
Evan Lezar
280f40508e Make pipeline manual on MRs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 08:51:18 +02:00
Evan Lezar
e2be0e2ff0 Add workflow rule to only trigger on MRs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 08:45:26 +02:00
Evan Lezar
dcff3118d9 Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!340
2023-03-14 13:54:11 +00:00
Evan Lezar
731168ec8d Update changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-14 15:05:36 +02:00
Evan Lezar
7b4435a0f8 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-14 15:05:29 +02:00
Evan Lezar
738af29724 Merge branch 'explicit-cdi-enabled-flag' into 'main'
Add --cdi-enabled option to control generating CDI spec

See merge request nvidia/container-toolkit/container-toolkit!339
2023-03-14 07:00:30 +00:00
Evan Lezar
08ef242afb Add --cdi-enabled option to control generating CDI spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-13 18:19:00 +02:00
Evan Lezar
92ea8be309 Merge branch 'fix-privileged-check-cdi-mode' into 'main'
Return empty list of devices for unprivileged containers when...

See merge request nvidia/container-toolkit/container-toolkit!337
2023-03-13 07:36:25 +00:00
Christopher Desiniotis
48414e97bb Return empty list of devices for unprivileged containers when 'accept-nvidia-visible-devices-envvar-unprivileged=false'
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-10 13:11:29 -08:00
Evan Lezar
77a2975524 Merge branch 'fix-kitmaker' into 'main'
Use component name as folder name

See merge request nvidia/container-toolkit/container-toolkit!336
2023-03-10 13:57:24 +00:00
Evan Lezar
ce9477966d Use component name as folder name
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-10 15:51:36 +02:00
Evan Lezar
fe02351c3a Merge branch 'bump-cuda-version' into 'main'
Bump CUDA base image version to 12.1.0

See merge request nvidia/container-toolkit/container-toolkit!335
2023-03-10 10:23:30 +00:00
Evan Lezar
9c2018a0dc Bump CUDA base image version to 12.1.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-10 11:31:23 +02:00
Evan Lezar
33e5b34fa1 Merge branch 'CNT-3999/legacy-cli-doesnt-work-in-cdi-mode' into 'main'
Add nvidia-container-runtime-hook.skip-mode-detection option to config

See merge request nvidia/container-toolkit/container-toolkit!330
2023-03-09 19:18:16 +00:00
Evan Lezar
ccf73f2505 Set skip-mode-detection in the toolkit-container by default
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 20:16:10 +02:00
Evan Lezar
3a11f6ee0a Add nvidia-container-runtime-hook.skip-mode-detection option to config
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 20:15:40 +02:00
Evan Lezar
8f694bbfb7 Merge branch 'set-nvidia-ctk-path' into 'main'
Set nvidia-ctk.path config option based on installed path

See merge request nvidia/container-toolkit/container-toolkit!334
2023-03-09 16:44:13 +00:00
Evan Lezar
4c2eff4865 Merge branch 'CNT-3998/cdi-accept-visible-devices-when-privileged' into 'main'
Honor accept-nvidia-visible-devices-envvar-when-unprivileged setting in CDI mode

See merge request nvidia/container-toolkit/container-toolkit!331
2023-03-09 15:59:08 +00:00
Evan Lezar
1fbdc17c40 Set nvidia-ctk.path config option based on installed path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 17:53:08 +02:00
Evan Lezar
965d62f326 Merge branch 'fix-containerd-integration-tests' into 'main'
Fix integration tests failing due to CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!333
2023-03-09 14:41:52 +00:00
Evan Lezar
25ea7fa98e Remove whitespace in Makefile
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 15:32:07 +02:00
Evan Lezar
5ee040ba95 Disable CDI spec generation for integration tests 2023-03-09 15:32:07 +02:00
Evan Lezar
eb2aec9da8 Allow CDI options to be set by envvars
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 12:25:05 +02:00
Evan Lezar
973e7bda5e Check accept-nvidia-visible-devices-envvar-when-unprivileged option for CDI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 11:15:53 +02:00
Evan Lezar
154cd4ecf3 Add to config struct
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 11:15:53 +02:00
Evan Lezar
936fad1d04 Move check for privileged images to config/image/ package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 11:15:53 +02:00
Evan Lezar
86dd046c7c Merge branch 'CNT-3928/allow-cdi-container-annotations' into 'main'
Add cdi.k8s.io annotations to runtimes configured in containerd

See merge request nvidia/container-toolkit/container-toolkit!315
2023-03-09 07:52:37 +00:00
Evan Lezar
510fb248fe Add cdi.k8s.io annotations to containerd config
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-08 07:23:27 +02:00
Evan Lezar
c7384c6aee Merge branch 'fix-comment' into 'main'
Fix comment

See merge request nvidia/container-toolkit/container-toolkit!329
2023-03-08 05:15:38 +00:00
Evan Lezar
1c3c9143f8 Fix comment
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-08 07:15:05 +02:00
Evan Lezar
1c696b1e39 Merge branch 'CNT-3894/configure-mode-specific-runtimes' into 'main'
Configure .cdi and .legacy executables in Toolkit Container

See merge request nvidia/container-toolkit/container-toolkit!308
2023-03-08 05:12:50 +00:00
Evan Lezar
a2adbc1133 Merge branch 'CNT-3898/improve-cdi-annotations' into 'main'
Improve handling of environment variable devices in CDI mode

See merge request nvidia/container-toolkit/container-toolkit!321
2023-03-08 04:37:41 +00:00
Evan Lezar
36576708f0 Merge branch 'CNT-3896/gds-mofed-devices' into 'main'
Add GDS and MOFED support to the NVCDI API

See merge request nvidia/container-toolkit/container-toolkit!323
2023-03-08 04:36:55 +00:00
Evan Lezar
cc7a6f166b Handle case were runtime name is set to predefined name
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:56 +02:00
Evan Lezar
62d88e7c95 Add cdi and legacy mode runtimes
This change adds .cdi and .legacy mode-specific runtimes the list of
runtimes supported by the operator. These are also installed as
part of the NVIDIA Container Toolkit.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:55 +02:00
Evan Lezar
dca8e3123f Migrate containerd config to engine.Interface
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:55 +02:00
Evan Lezar
3bac4fad09 Migrate cri-o config update to engine.Interface
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:54 +02:00
Evan Lezar
9fff19da23 Migrate docker config to engine.Interface
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:54 +02:00
Evan Lezar
e5bb4d2718 Move runtime config code from config to config/engine
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:54 +02:00
Evan Lezar
5bfb51f801 Add API for interacting with runtime engine configs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:53 +02:00
Evan Lezar
ece5b29d97 Add tools/container/operator package to handle runtime naming
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:53 +02:00
Evan Lezar
ec8a92c17f Use nvidia-container-runtime.experimental as wrapper
This change switches to using nvidia-container-runtime.experimental as the
wrapper name over nvidia-container-runtime-experimental. This is consistent
with upcoming mode-specific binaries.

The wrapper is created at nvidia-container-runtime.experimental.real.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:53 +02:00
Evan Lezar
868393b7ed Add mofed mode to nvcdi API
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 18:47:52 +02:00
Evan Lezar
ebe18fbb7f Add gds mode to nvcdi API
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 18:47:52 +02:00
Evan Lezar
9435343541 Merge branch 'fix-kitmaker' into 'main'
Include = when extracting manifest information

See merge request nvidia/container-toolkit/container-toolkit!327
2023-03-07 14:44:27 +00:00
Evan Lezar
1cd20afe4f Include = when extracting manifest information
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 16:43:49 +02:00
Evan Lezar
1e6fe40c76 Allow nvidia-container-runtime.modes.cdi.default-kind to be set
This change allows the nvidia-container-runtime.modes.cdi.default-kind
to be set in the toolkit-container.

The NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND envvar is used.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 16:19:38 +02:00
Evan Lezar
6d220ed9a2 Rework selection of devices in CDI mode
The following changes are made:
* The default-cdi-kind config option is used to convert an envvar entry to a fully-qualified device name
* If annotation devices exist, these are used instead of the envvar devices.
* The `all` device is no longer treated as a special case and MUST exist in the CDI spec.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 16:18:53 +02:00
Evan Lezar
f00439c93e Add nvidia-container-runtime.modes.csv.default-kind config option
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 16:18:53 +02:00
Evan Lezar
c59696e30e Merge branch 'fix-kitmaker' into 'main'
Log source file

See merge request nvidia/container-toolkit/container-toolkit!326
2023-03-06 16:26:43 +00:00
Evan Lezar
89c18c73cd Add source and log curl command
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 18:26:05 +02:00
Evan Lezar
cb5006c73f Merge branch 'CNT-3897/generate-management-container-spec' into 'main'
Generate CDI specs for management containers

See merge request nvidia/container-toolkit/container-toolkit!314
2023-03-06 16:23:13 +00:00
Evan Lezar
547b71f222 Merge branch 'change-discovery-mode' into 'main'
Rename --discovery-mode to --mode

See merge request nvidia/container-toolkit/container-toolkit!318
2023-03-06 16:21:22 +00:00
Evan Lezar
ae84bfb055 Log source file
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 18:11:12 +02:00
Evan Lezar
9b303d5b89 Merge branch 'fix-changelist' into 'main'
Strip on tilde for kitmaker version

See merge request nvidia/container-toolkit/container-toolkit!325
2023-03-06 14:10:54 +00:00
Evan Lezar
d944f934d7 Strip on tilde for kitmaker version
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 16:10:25 +02:00
Evan Lezar
c37209cd09 Merge branch 'fix-changelist' into 'main'
Fix blank changelist in kitmaker properties

See merge request nvidia/container-toolkit/container-toolkit!324
2023-03-06 13:51:19 +00:00
Evan Lezar
863b569a61 Fix blank changelist in kitmaker properties
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 15:50:38 +02:00
Evan Lezar
f36c514f1f Merge branch 'update-kitmaker-folders' into 'main'
Update kitmaker target folder

See merge request nvidia/container-toolkit/container-toolkit!313
2023-03-06 11:16:49 +00:00
Evan Lezar
3ab28c7fa4 Merge branch 'fix-rule-for-release' into 'main'
Run full build on release- branches

See merge request nvidia/container-toolkit/container-toolkit!320
2023-03-06 10:56:58 +00:00
Evan Lezar
c03258325b Run full build on release- branches
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 12:54:27 +02:00
Evan Lezar
20d3bb189b Rename --discovery-mode to --mode
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 11:00:22 +02:00
Evan Lezar
90acec60bb Skip CDI spec generation in integration tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:57:40 +02:00
Evan Lezar
0565888c03 Generate CDI spec in toolkit container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:57:40 +02:00
Evan Lezar
f7e817cff6 Support management mode in nvidia-ctk cdi generate
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:53:43 +02:00
Evan Lezar
29cbbe83f9 Add management mode to CDI spec generation API
These changes add support for generating a management spec to the nvcdi API.
A management spec consists of a single CDI device (`all`) which includes all expected
NVIDIA device nodes, driver libraries, binaries, and IPC sockets.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:53:43 +02:00
Evan Lezar
64b16acb1f Also install nvidia-ctk in toolkit-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:53:43 +02:00
Evan Lezar
19c20bb422 Merge branch 'CNT-3931/add-spec-validation' into 'main'
Add nvcdi.spec for writing and validating CDI specifications

See merge request nvidia/container-toolkit/container-toolkit!306
2023-03-06 08:52:56 +00:00
Evan Lezar
28b10d2ee0 Merge branch 'fix-toolkit-ctr-envvars' into 'main'
Fix handling of envvars in toolkit container which modify the NVIDIA Container Runtime config

See merge request nvidia/container-toolkit/container-toolkit!317
2023-03-06 07:36:03 +00:00
Christopher Desiniotis
1f5123f72a Fix handling of envvars in toolkit container which modify the NVIDIA Container Runtime config
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-05 20:14:04 -08:00
Evan Lezar
ac5b6d097b Use kitmaker folder for releases
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 17:27:07 +02:00
Evan Lezar
a7bf9ddf28 Update kitmaker folder structure
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 17:27:07 +02:00
Evan Lezar
e27479e170 Add GIT_COMMIT_SHORT to packaging image manifest
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 17:27:07 +02:00
Evan Lezar
fa28e738c6 Merge branch 'fix-internal-scans' into 'main'
Fix internal scans

See merge request nvidia/container-toolkit/container-toolkit!316
2023-03-01 15:26:27 +00:00
Evan Lezar
898c5555f6 Fix internal scans
This fixes the internal scans due to the removed ubuntu18.04 images.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 17:25:28 +02:00
Evan Lezar
314059fcf0 Move path manipulation to spec.Save
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:49:04 +02:00
Evan Lezar
221781bd0b Use full path for output spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
9f5e141437 Expose vendor and class as options
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
8be6de177f Move formatJSON and formatYAML to nvcdi/spec package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
890a519121 Use nvcdi.spec package to write and validate spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
89321edae6 Add top-level GetSpec function to nvcdi API
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
6d6cd56196 Return nvcdi.spec.Interface from GetSpec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 12:45:30 +02:00
Evan Lezar
2e95e04359 Add nvcdi.spec package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 12:45:30 +02:00
Evan Lezar
accba4ead5 Merge branch 'CNT-3965/clean-up-by-path-symlinks' into 'main'
Improve handling of /dev/dri devices and nested device paths

See merge request nvidia/container-toolkit/container-toolkit!307
2023-03-01 10:25:48 +00:00
Christopher Desiniotis
1e9b7883cf Merge branch 'CNT-3937/add-target-driver-root' into 'main'
Add a driver root transformer to nvcdi

See merge request nvidia/container-toolkit/container-toolkit!300
2023-02-28 18:04:29 +00:00
Christopher Desiniotis
87e406eee6 Update root transformer tests to ensure container path is not modified
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-02-28 09:00:05 -08:00
Christopher Desiniotis
45ed3b0412 Handle hook arguments for creation of symlinks
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-02-28 09:00:02 -08:00
Christopher Desiniotis
0516fc96ca Add Transform interface and initial implemention for a root transform
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-02-28 08:56:13 -08:00
Evan Lezar
e7a435fd5b Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!312
2023-02-27 13:41:26 +00:00
Evan Lezar
7a249d7771 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 15:41:02 +02:00
Evan Lezar
7986ff9cee Merge branch 'CNT-3963/deduplicate-wsl-driverstore-paths' into 'main'
Deduplicate WSL driverstore paths

See merge request nvidia/container-toolkit/container-toolkit!304
2023-02-27 13:27:31 +00:00
Evan Lezar
b74c13d75f Merge branch 'fix-rpm-postun-scriptlet' into 'main'
nvidia-container-toolkit.spec: fix syntax error in postun scriptlet

See merge request nvidia/container-toolkit/container-toolkit!309
2023-02-27 12:36:49 +00:00
Evan Lezar
de8eeb87f4 Merge branch 'remove-outdated-platforms' into 'main'
Remove outdated platforms from CI

See merge request nvidia/container-toolkit/container-toolkit!310
2023-02-27 11:48:33 +00:00
Evan Lezar
36c4174de3 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 13:45:44 +02:00
Evan Lezar
3497936cdf Remove ubuntu18.04 toolkit-container image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:55:17 +02:00
Evan Lezar
81abc92743 Remove fedora35 from 'all' targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:31:38 +02:00
Evan Lezar
1ef8dc3137 Remove centos7-ppc64le from CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:30:29 +02:00
Evan Lezar
9a5c1bbe48 Remove ubuntu16.04 packages from CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:29:35 +02:00
Evan Lezar
30dff61376 Remove debian9 packages from CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:28:46 +02:00
Claudius Volz
de1bb68d19 nvidia-container-toolkit.spec: fix syntax error in postun scriptlet
Signed-off-by: Claudius Volz <c.volz@gmx.de>
2023-02-27 00:45:21 +01:00
Evan Lezar
06d8bb5019 Merge branch 'CNT-3965/dont-fail-chmod-hook' into 'main'
Skip paths with errors in chmod hook

See merge request nvidia/container-toolkit/container-toolkit!303
2023-02-22 15:20:26 +00:00
Evan Lezar
b4dc1f338d Generate nested device folder permission hooks per device
This change generates device folder permission hooks per device instead of
at a spec level. This ensures that the hook is not injected for a device that
does not have any nested device nodes.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-22 17:16:23 +02:00
Evan Lezar
181128fe73 Only include by-path-symlinks for injected device nodes
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-22 16:53:04 +02:00
Evan Lezar
252838e696 Merge branch 'bump-version-v1.13.0-rc.2' into 'main'
Bump version to v1.13.0-rc.2

See merge request nvidia/container-toolkit/container-toolkit!305
2023-02-21 13:11:00 +00:00
Evan Lezar
49f171a8b1 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 14:27:02 +02:00
Evan Lezar
3d12803ab3 Bump version to v1.13.0-rc.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 14:24:37 +02:00
Evan Lezar
a168091bfb Add v1.13.0-rc.1 Changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 14:23:52 +02:00
Evan Lezar
35fc57291f Deduplicate WSL driverstore paths
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 11:48:56 +02:00
Evan Lezar
2542224d7b Skip paths with errors in chmod hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 11:47:11 +02:00
Evan Lezar
882fbb3209 Merge branch 'add-cdi-auto-mode' into 'main'
Add constants for CDI mode to nvcdi API

See merge request nvidia/container-toolkit/container-toolkit!302
2023-02-20 14:41:07 +00:00
Evan Lezar
2680c45811 Add mode constants to nvcdi
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 16:33:51 +02:00
Evan Lezar
b76808dbd5 Add tests for CDI mode resolution
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 16:33:33 +02:00
Evan Lezar
ba50b50a15 Merge branch 'add-cdi-auto-mode' into 'main'
Add auto mode to CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!292
2023-02-20 14:30:33 +00:00
Evan Lezar
f6d3f8d471 Merge branch 'CNT-3895/add-runtime-mode-config' into 'main'
Add nvidia-container-runtime.mode config option

See merge request nvidia/container-toolkit/container-toolkit!299
2023-02-20 12:51:18 +00:00
Evan Lezar
d9859d66bf Update go vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 14:49:58 +02:00
Evan Lezar
4ccb0b9a53 Add and resolve auto discovery mode for cdi generation
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 14:49:58 +02:00
Evan Lezar
f36c775d50 Merge branch 'wsl2-wip' into 'main'
Add CDI Spec generation on WSL2

See merge request nvidia/container-toolkit/container-toolkit!289
2023-02-20 09:36:41 +00:00
Evan Lezar
b21dc929ef Add WSL2 discovery and spec generation
These changes add a wsl discovery mode to the nvidia-ctk cdi generate command.

If wsl mode is enabled, the driver store for the available devices is used as
the source for discovered entities.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
d226925fe7 Construct nvml-based CDI lib based on mode
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
20d6e9af04 Add --discovery-mode to nvidia-ctk cdi generate command
This change adds --discovery-mode flag to the nvidia-ctk cdi generate
command and plumbs this through to the CDI API.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
5103adab89 Add mode option to nvcdi API
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
7eb435eb73 Add basic dxcore bindings
This change copies dxcore.h and dxcore.c from libnvidia-container to
allow for the driver store path to be queried. Modifications are made
to dxcore to remove the code associated with checking the components
in the driver store path.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
5d011c1333 Add Discoverer to create a single symlink
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
6adb792d57 Merge branch 'fix-nvidia-ctk-path' into 'main'
Ensure that generate uses a consistent nvidia-ctk path

See merge request nvidia/container-toolkit/container-toolkit!301
2023-02-20 08:29:44 +00:00
Evan Lezar
a844749791 Ensure that generate uses a consistent nvidia-ctk path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:28:45 +02:00
Evan Lezar
dd0d43e726 Add nvidia-container-runtime.mode config option
This change allows the nvidia-container-runtime.mode option to be set
by the toolkit container.

This is controlled by the --nvidia-container-runtime-mode command line
argument and the NVIDIA_CONTAINER_RUNTIME_MODE envvar.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-17 18:04:49 +02:00
Evan Lezar
25811471fa Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!298
2023-02-17 08:46:56 +00:00
Evan Lezar
569bc1a889 Update Changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-17 10:46:21 +02:00
Evan Lezar
b1756b410a Merge branch 'fix-logging' into 'main'
Fix nvidia-container-runtime logging

See merge request nvidia/container-toolkit/container-toolkit!296
2023-02-16 15:17:24 +00:00
Evan Lezar
7789ac6331 Fix logger.Update and Reset
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-16 15:22:56 +01:00
Evan Lezar
7a3aabbbda Add logger test
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-16 15:22:56 +01:00
Evan Lezar
e486095603 Merge branch 'fix-nvidia-ctk-path' into 'main'
Fix issue with blank nvidia-ctk path

See merge request nvidia/container-toolkit/container-toolkit!297
2023-02-16 13:58:43 +00:00
Evan Lezar
bf6babe07e Fix issue with blank nvidia-ctk path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-16 14:18:07 +01:00
Kevin Klues
d5a4d89682 Merge branch 'support-multimple-firmware-files' into 'main'
Add globbing for mounting multiple GSP firmware files

See merge request nvidia/container-toolkit/container-toolkit!295
2023-02-16 13:09:47 +00:00
Kevin Klues
5710b9e7e8 Add globbing for mounting multiple GSP firmware files
Newer drivers have split the GSP firmware into multiple files so a simple match
against gsp.bin in the firmware directory is no longer possible. This patch
adds globbing capabilitis to match any GSP firmware files of the form gsp*.bin
and mount them all into the container.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
2023-02-16 11:53:36 +00:00
Evan Lezar
b4ab95f00c Merge branch 'fix-nvcdi-constructor' into 'main'
fix: apply options when constructing an instance of the nvcdi library

See merge request nvidia/container-toolkit/container-toolkit!294
2023-02-15 08:13:19 +00:00
Christopher Desiniotis
a52c9f0ac6 fix: apply options when constructing an instance of the nvcdi library
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-02-14 16:32:40 -08:00
Evan Lezar
b6bab4d3fd Merge branch 'expose-generate-spec' into 'main'
Implement basic CDI spec generation API

See merge request nvidia/container-toolkit/container-toolkit!257
2023-02-14 19:36:31 +00:00
Evan Lezar
5b110fba2d Add nvcdi package with basic CDI generation API
This change adds an nvcdi package that exposes a basic API for
CDI spec generation. This is used from the nvidia-ctk cdi generate
command and can be consumed by DRA implementations and the device plugin.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-14 19:52:31 +01:00
Evan Lezar
179133c8ad Merge branch 'fix-ubi8' into 'main'
Fix package version in ubi8 container builds

See merge request nvidia/container-toolkit/container-toolkit!293
2023-02-14 10:31:21 +00:00
Evan Lezar
365b6c7bc2 Fix package version in ubi8 container builds
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-14 10:50:38 +01:00
Evan Lezar
dc4887cd44 Merge branch 'cdi-executable' into 'main'
Add nvidia-container-runtime.{{MODE}} executable that overrides runtime mode

See merge request nvidia/container-toolkit/container-toolkit!288
2023-02-14 08:01:41 +00:00
Evan Lezar
c4836a576f Also skip nvidia-container-toolit-operator-extensions in release scripts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:10:01 +01:00
Evan Lezar
98afe0d27a Generate nvidia-container-toolkit-operator-extensions package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
fdc759f7c2 Add nvidia-container-runtime.legacy executable
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
43448bac11 Add nvidia-container-runtime.cdi executable
This change adds an nvidia-container-runtime.cdi executable that
overrides the runtime mode from the config to "cdi".

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
456d2864a6 Log config in JSON if possible
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
406a5ec76f Implement runtime package for creating runtime CLI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
f71c419cfb Move modifying OCI runtime wrapper to oci package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
babb73295f Update gitignore
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:45 +01:00
Evan Lezar
f3ec5fd329 Merge branch 'packaging-verisons' into 'main'
Align release candidate RPM version with Debian version

See merge request nvidia/container-toolkit/container-toolkit!291
2023-02-13 14:53:01 +00:00
Evan Lezar
5aca0d147d Use - as version-tag separator for libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 15:10:08 +01:00
Evan Lezar
f2b19b6ae9 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 14:39:14 +01:00
Evan Lezar
7cb9ed66be Align release candidate RPM version with Debian version
The version for RPM release candidates has the form `1.13.0-0.1.rc.1-1` whereas debian packages have the form `1.13.0~rc.1-1`.

Note that since the `~` is handled in [the same way](https://docs.fedoraproject.org/en-US/packaging-guidelines/Versioning/#_handling_non_sorting_versions_with_tilde_dot_and_caret) as for Debian packages, there does not seem to be a specific reason for this and dealing with multiple version strings in our entire pipeline adds complexity.

This change aligns the package versioning for rpm packages with Debian packages.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 14:31:23 +01:00
Evan Lezar
d578f4598a Remove fedora35 pipeline targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 14:31:23 +01:00
Evan Lezar
d30e6c23ab Merge branch 'update-ldflags' into 'main'
Update ldflags for cgo

See merge request nvidia/container-toolkit/container-toolkit!290
2023-02-10 14:17:53 +00:00
Evan Lezar
1c05f2fb9a Merge branch 'add-options-to-mounts' into 'main'
Add Options to mounts to refactor IPC CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!287
2023-02-10 08:04:24 +00:00
Evan Lezar
1407ace94a Update ldflags for cgo
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 21:54:49 +01:00
Evan Lezar
97008f2db6 Move IPC discoverer into DriverDiscoverer
This simplifies the construction of the required common edits
when constructing a CDI specification.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
076eed7eb4 Update ipcMount to add noexec option
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
33c7b056ea Add ipcMounts type
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
3b8c40c3e6 Move IPC discoverer to internal/discover package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
3f70521a63 Add Options to discover.Mount
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
21f5895b5a Merge branch 'bump-version-1.13.0-rc.1' into 'main'
Bump version to 1.13.0-rc.1

See merge request nvidia/container-toolkit/container-toolkit!286
2023-02-07 11:38:18 +00:00
Evan Lezar
738a2e7343 Bump version to 1.13.0-rc.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-07 11:55:47 +01:00
Evan Lezar
62bd015475 Merge branch 'bump-version-v1.12.0' into 'main'
Bump version to v1.12.0

See merge request nvidia/container-toolkit/container-toolkit!285
2023-02-03 14:07:30 +00:00
Evan Lezar
ac5c62c116 Bump CUDA base images to 12.0.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-03 14:25:42 +01:00
Evan Lezar
80fe1065ad Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-03 14:23:49 +01:00
Evan Lezar
fea195cc8d Bump version to v1.12.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-03 13:58:26 +01:00
Evan Lezar
9ef314e1e3 Merge branch 'rename-root-flag' into 'main'
Rename root to driverRoot for CDI generation

See merge request nvidia/container-toolkit/container-toolkit!284
2023-02-02 16:33:24 +00:00
Evan Lezar
95f859118b Update changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 15:58:00 +01:00
Evan Lezar
daceac9117 Rename discover.Config.Root to discover.Config.DriverRoot
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 15:57:15 +01:00
Evan Lezar
cfa2647260 Rename root to driverRoot for CDI generation
This makes the intent of the command line argument clearer since this
relates specifically to the root where the NVIDIA driver is installed.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 15:42:04 +01:00
Evan Lezar
03cdf3b5d7 Merge branch 'bump-version-v1.12.0-rc.6' into 'main'
Bump version to v1.12.0-rc.6

See merge request nvidia/container-toolkit/container-toolkit!283
2023-02-02 13:50:25 +00:00
Evan Lezar
f8f415a605 Ensure container-archive name is unique
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 12:03:34 +01:00
Evan Lezar
fe117d3916 Udpate libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 12:02:57 +01:00
Evan Lezar
069536d598 Bump version to v1.12.0-rc.6
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 12:00:08 +01:00
Evan Lezar
5f53ca0af5 Add missing v1.12.0-rc.5 changelog entry
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 11:58:44 +01:00
383 changed files with 30461 additions and 5584 deletions

View File

@@ -23,6 +23,7 @@ variables:
BUILD_MULTI_ARCH_IMAGES: "true"
stages:
- trigger
- image
- lint
- go-checks
@@ -34,13 +35,44 @@ stages:
- scan
- release
.pipeline-trigger-rules:
rules:
# We trigger the pipeline if started manually
- if: $CI_PIPELINE_SOURCE == "web"
# We trigger the pipeline on the main branch
- if: $CI_COMMIT_BRANCH == "main"
# We trigger the pipeline on the release- branches
- if: $CI_COMMIT_BRANCH =~ /^release-.*$/
# We trigger the pipeline on tags
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
workflow:
rules:
# We trigger the pipeline on a merge request
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
# We then add all the regular triggers
- !reference [.pipeline-trigger-rules, rules]
# The main or manual job is used to filter out distributions or architectures that are not required on
# every build.
.main-or-manual:
rules:
- if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
- !reference [.pipeline-trigger-rules, rules]
- if: $CI_PIPELINE_SOURCE == "schedule"
when: manual
# The trigger-pipeline job adds a manualy triggered job to the pipeline on merge requests.
trigger-pipeline:
stage: trigger
script:
- echo "starting pipeline"
rules:
- !reference [.main-or-manual, rules]
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
when: manual
allow_failure: false
- when: always
# Define the distribution targets
.dist-amazonlinux2:
rules:
@@ -70,20 +102,6 @@ stages:
DIST: debian10
PACKAGE_REPO_TYPE: debian
.dist-debian9:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: debian9
PACKAGE_REPO_TYPE: debian
.dist-fedora35:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: fedora35
PACKAGE_REPO_TYPE: rpm
.dist-opensuse-leap15.1:
rules:
- !reference [.main-or-manual, rules]
@@ -99,13 +117,6 @@ stages:
CVE_UPDATES: "cyrus-sasl-lib"
PACKAGE_REPO_TYPE: rpm
.dist-ubuntu16.04:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: ubuntu16.04
PACKAGE_REPO_TYPE: debian
.dist-ubuntu18.04:
variables:
DIST: ubuntu18.04
@@ -113,8 +124,6 @@ stages:
PACKAGE_REPO_TYPE: debian
.dist-ubuntu20.04:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: ubuntu20.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
@@ -259,22 +268,15 @@ release:staging-ubi8:
needs:
- image-ubi8
release:staging-ubuntu18.04:
extends:
- .release:staging
- .dist-ubuntu18.04
needs:
- test-toolkit-ubuntu18.04
- test-containerd-ubuntu18.04
- test-crio-ubuntu18.04
- test-docker-ubuntu18.04
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
- test-toolkit-ubuntu20.04
- test-containerd-ubuntu20.04
- test-crio-ubuntu20.04
- test-docker-ubuntu20.04
release:staging-packaging:
extends:

113
.github/workflows/blossom-ci.yaml vendored Normal file
View File

@@ -0,0 +1,113 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A workflow to trigger ci on hybrid infra (github + self hosted runner)
name: Blossom-CI
on:
issue_comment:
types: [created]
workflow_dispatch:
inputs:
platform:
description: 'runs-on argument'
required: false
args:
description: 'argument'
required: false
jobs:
Authorization:
name: Authorization
runs-on: blossom
outputs:
args: ${{ env.args }}
# This job only runs for pull request comments
if: |
contains( '\
anstockatnv,\
rohitrajani2018,\
cdesiniotis,\
shivamerla,\
ArangoGutierrez,\
elezar,\
klueska,\
zvonkok,\
', format('{0},', github.actor)) &&
github.event.comment.body == '/blossom-ci'
steps:
- name: Check if comment is issued by authorized person
run: blossom-ci
env:
OPERATION: 'AUTH'
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
Vulnerability-scan:
name: Vulnerability scan
needs: [Authorization]
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
lfs: 'true'
# repo specific steps
#- name: Setup java
# uses: actions/setup-java@v1
# with:
# java-version: 1.8
# add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
#- name: Setup blackduck properties
# run: |
# PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
# echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
# echo detect.maven.included.scopes=compile >> application.properties
- name: Run blossom action
uses: NVIDIA/blossom-action@main
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
with:
args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
Job-trigger:
name: Start ci job
needs: [Vulnerability-scan]
runs-on: blossom
steps:
- name: Start ci job
run: blossom-ci
env:
OPERATION: 'START-CI-JOB'
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Upload-Log:
name: Upload log
runs-on: blossom
if : github.event_name == 'workflow_dispatch'
steps:
- name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here)
run: blossom-ci
env:
OPERATION: 'POST-PROCESSING'
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}

2
.gitignore vendored
View File

@@ -1,9 +1,11 @@
dist
artifacts
*.swp
*.swo
/coverage.out*
/test/output/
/nvidia-container-runtime
/nvidia-container-runtime.*
/nvidia-container-runtime-hook
/nvidia-container-toolkit
/nvidia-ctk

View File

@@ -116,12 +116,6 @@ package-amazonlinux2-x86_64:
- .dist-amazonlinux2
- .arch-x86_64
package-centos7-ppc64le:
extends:
- .package-build
- .dist-centos7
- .arch-ppc64le
package-centos7-x86_64:
extends:
- .package-build
@@ -152,42 +146,12 @@ package-debian10-amd64:
- .dist-debian10
- .arch-amd64
package-debian9-amd64:
extends:
- .package-build
- .dist-debian9
- .arch-amd64
package-fedora35-aarch64:
extends:
- .package-build
- .dist-fedora35
- .arch-aarch64
package-fedora35-x86_64:
extends:
- .package-build
- .dist-fedora35
- .arch-x86_64
package-opensuse-leap15.1-x86_64:
extends:
- .package-build
- .dist-opensuse-leap15.1
- .arch-x86_64
package-ubuntu16.04-amd64:
extends:
- .package-build
- .dist-ubuntu16.04
- .arch-amd64
package-ubuntu16.04-ppc64le:
extends:
- .package-build
- .dist-ubuntu16.04
- .arch-ppc64le
package-ubuntu18.04-amd64:
extends:
- .package-build
@@ -240,7 +204,6 @@ image-centos7:
- .package-artifacts
- .dist-centos7
needs:
- package-centos7-ppc64le
- package-centos7-x86_64
image-ubi8:
@@ -254,17 +217,6 @@ image-ubi8:
- package-centos8-x86_64
- package-centos8-ppc64le
image-ubuntu18.04:
extends:
- .image-build
- .package-artifacts
- .dist-ubuntu18.04
needs:
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- job: package-ubuntu18.04-ppc64le
optional: true
image-ubuntu20.04:
extends:
- .image-build
@@ -273,7 +225,8 @@ image-ubuntu20.04:
needs:
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- package-ubuntu18.04-ppc64le
- job: package-ubuntu18.04-ppc64le
optional: true
# The DIST=packaging target creates an image containing all built packages
image-packaging:
@@ -290,26 +243,14 @@ image-packaging:
optional: true
- job: package-amazonlinux2-x86_64
optional: true
- job: package-centos7-ppc64le
optional: true
- job: package-centos7-x86_64
optional: true
- job: package-centos8-ppc64le
optional: true
- job: package-debian10-amd64
optional: true
- job: package-debian9-amd64
optional: true
- job: package-fedora35-aarch64
optional: true
- job: package-fedora35-x86_64
optional: true
- job: package-opensuse-leap15.1-x86_64
optional: true
- job: package-ubuntu16.04-amd64
optional: true
- job: package-ubuntu16.04-ppc64le
optional: true
- job: package-ubuntu18.04-ppc64le
optional: true
@@ -343,31 +284,31 @@ image-packaging:
TEST_CASES: "crio"
# Define the test targets
test-toolkit-ubuntu18.04:
test-toolkit-ubuntu20.04:
extends:
- .test:toolkit
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04
test-containerd-ubuntu18.04:
test-containerd-ubuntu20.04:
extends:
- .test:containerd
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04
test-crio-ubuntu18.04:
test-crio-ubuntu20.04:
extends:
- .test:crio
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04
test-docker-ubuntu18.04:
test-docker-ubuntu20.04:
extends:
- .test:docker
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04

2
.gitmodules vendored
View File

@@ -1,7 +1,7 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = main
branch = release-1.13
[submodule "third_party/nvidia-container-runtime"]
path = third_party/nvidia-container-runtime
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git

View File

@@ -36,8 +36,7 @@ variables:
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
# TODO: We should set the kitmaker release folder here once we have the end-to-end workflow set up
KITMAKER_RELEASE_FOLDER: "testing"
KITMAKER_RELEASE_FOLDER: "kitmaker"
.image-pull:
stage: image-build
@@ -79,11 +78,6 @@ image-ubi8:
- .dist-ubi8
- .image-pull
image-ubuntu18.04:
extends:
- .dist-ubuntu18.04
- .image-pull
image-ubuntu20.04:
extends:
- .dist-ubuntu20.04
@@ -110,7 +104,7 @@ image-packaging:
image: "${PULSE_IMAGE}"
variables:
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
IMAGE_ARCHIVE: "container-toolkit.tar"
IMAGE_ARCHIVE: "container-toolkit-${DIST}-${ARCH}-${CI_JOB_ID}.tar"
rules:
- if: $SKIP_SCANS != "yes"
- when: manual
@@ -153,14 +147,6 @@ scan-centos7-arm64:
- image-centos7
- scan-centos7-amd64
scan-ubuntu18.04-amd64:
extends:
- .dist-ubuntu18.04
- .platform-amd64
- .scan
needs:
- image-ubuntu18.04
scan-ubuntu20.04-amd64:
extends:
- .dist-ubuntu20.04
@@ -230,12 +216,12 @@ release:packages:kitmaker:
extends:
- .release:packages
release:staging-ubuntu18.04:
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04
# Define the external release targets
# Release to NGC
@@ -244,11 +230,6 @@ release:ngc-centos7:
- .dist-centos7
- .release:ngc
release:ngc-ubuntu18.04:
extends:
- .dist-ubuntu18.04
- .release:ngc
release:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04

View File

@@ -1,7 +1,107 @@
# NVIDIA Container Toolkit Changelog
## v1.13.3
* Generate CDI specification files with `644` permissions to allow rootless applications (e.g. podman).
* Fix bug causing incorrect nvidia-smi symlink to be created on WSL2 systems with multiple driver roots.
* Fix bug when using driver versions that do not include a patch component in their version number.
* Skip additional modifications in CDI mode.
* Fix loading of kernel modules and creation of device nodes in containerized use cases.
* [toolkit-container] Allow same envars for all runtime configs
## v1.13.2
* Add `nvidia-container-runtime-hook.path` config option to specify NVIDIA Container Runtime Hook path explicitly.
* Fix bug in creation of `/dev/char` symlinks by failing operation if kernel modules are not loaded.
* Add option to load kernel modules when creating device nodes
* Add option to create device nodes when creating `/dev/char` symlinks
* Treat failures to open debug log files as non-fatal.
* Bump CUDA base image version to 12.1.1.
## v1.13.1
* Update `update-ldcache` hook to only update ldcache if it exists.
* Update `update-ldcache` hook to create `/etc/ld.so.conf.d` folder if it doesn't exist.
* Fix failure when libcuda cannot be located during XOrg library discovery.
* Fix CDI spec generation on systems that use `/etc/alternatives` (e.g. Debian)
## v1.13.0
* Promote 1.13.0-rc.3 to 1.13.0
## v1.13.0-rc.3
* Only initialize NVML for modes that require it when runing `nvidia-ctk cdi generate`.
* Prefer /run over /var/run when locating nvidia-persistenced and nvidia-fabricmanager sockets.
* Fix the generation of CDI specifications for management containers when the driver libraries are not in the LDCache.
* Add transformers to deduplicate and simplify CDI specifications.
* Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions.
* Also return an error from the nvcdi.New constructor instead of panicing.
* Detect XOrg libraries for injection and CDI spec generation.
* Add `nvidia-ctk system create-device-nodes` command to create control devices.
* Add `nvidia-ctk cdi transform` command to apply transforms to CDI specifications.
* Add `--vendor` and `--class` options to `nvidia-ctk cdi generate`
* [libnvidia-container] Fix segmentation fault when RPC initialization fails.
* [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2.
* [libnvidia-container] Remove make targets for fedora35 as the centos8 packages are compatible.
* [toolkit-container] Add `nvidia-container-runtime.modes.cdi.annotation-prefixes` config option that allows the CDI annotation prefixes that are read to be overridden.
* [toolkit-container] Create device nodes when generating CDI specification for management containers.
* [toolkit-container] Add `nvidia-container-runtime.runtimes` config option to set the low-level runtime for the NVIDIA Container Runtime
## v1.13.0-rc.2
* Don't fail chmod hook if paths are not injected
* Only create `by-path` symlinks if CDI devices are actually requested.
* Fix possible blank `nvidia-ctk` path in generated CDI specifications
* Fix error in postun scriplet on RPM-based systems
* Only check `NVIDIA_VISIBLE_DEVICES` for environment variables if no annotations are specified.
* Add `cdi.default-kind` config option for constructing fully-qualified CDI device names in CDI mode
* Add support for `accept-nvidia-visible-devices-envvar-unprivileged` config setting in CDI mode
* Add `nvidia-container-runtime-hook.skip-mode-detection` config option to bypass mode detection. This allows `legacy` and `cdi` mode, for example, to be used at the same time.
* Add support for generating CDI specifications for GDS and MOFED devices
* Ensure CDI specification is validated on save when generating a spec
* Rename `--discovery-mode` argument to `--mode` for `nvidia-ctk cdi generate`
* [libnvidia-container] Fix segfault on WSL2 systems
* [toolkit-container] Add `--cdi-enabled` flag to toolkit config
* [toolkit-container] Install `nvidia-ctk` from toolkit container
* [toolkit-container] Use installed `nvidia-ctk` path in NVIDIA Container Toolkit config
* [toolkit-container] Bump CUDA base images to 12.1.0
* [toolkit-container] Set `nvidia-ctk` path in the
* [toolkit-container] Add `cdi.k8s.io/*` to set of allowed annotations in containerd config
* [toolkit-container] Generate CDI specification for use in management containers
* [toolkit-container] Install experimental runtime as `nvidia-container-runtime.experimental` instead of `nvidia-container-runtime-experimental`
* [toolkit-container] Install and configure mode-specific runtimes for `cdi` and `legacy` modes
## v1.13.0-rc.1
* Include MIG-enabled devices as GPUs when generating CDI specification
* Fix missing NVML symbols when running `nvidia-ctk` on some platforms [#49]
* Add CDI spec generation for WSL2-based systems to `nvidia-ctk cdi generate` command
* Add `auto` mode to `nvidia-ctk cdi generate` command to automatically detect a WSL2-based system over a standard NVML-based system.
* Add mode-specific (`.cdi` and `.legacy`) NVIDIA Container Runtime binaries for use in the GPU Operator
* Discover all `gsb*.bin` GSP firmware files when generating CDI specification.
* Align `.deb` and `.rpm` release candidate package versions
* Remove `fedora35` packaging targets
* [libnvidia-container] Include all `gsp*.bin` firmware files if present
* [libnvidia-container] Align `.deb` and `.rpm` release candidate package versions
* [libnvidia-container] Remove `fedora35` packaging targets
* [toolkit-container] Install `nvidia-container-toolkit-operator-extensions` package for mode-specific executables.
* [toolkit-container] Allow `nvidia-container-runtime.mode` to be set when configuring the NVIDIA Container Toolkit
## v1.12.0
* Promote `v1.12.0-rc.5` to `v1.12.0`
* Rename `nvidia cdi generate` `--root` flag to `--driver-root` to better indicate intent
* [libnvidia-container] Add nvcubins.bin to DriverStore components under WSL2
* [toolkit-container] Bump CUDA base images to 12.0.1
## v1.12.0-rc.5
* Fix bug here the `nvidia-ctk` path was not properly resolved. This causes failures to run containers when the runtime is configured in `csv` mode or if the `NVIDIA_DRIVER_CAPABILITIES` includes `graphics` or `display` (e.g. `all`).
## v1.12.0-rc.4
* Generate a minimum CDI spec version for improved compatibility.

View File

@@ -61,7 +61,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)
$(CMD_TARGETS): cmd-%:
GOOS=$(GOOS) go build -ldflags "-s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
build:
GOOS=$(GOOS) go build ./...

View File

@@ -29,6 +29,7 @@ ARG PACKAGE_DIST
ARG PACKAGE_VERSION
ARG GIT_BRANCH
ARG GIT_COMMIT
ARG GIT_COMMIT_SHORT
ARG SOURCE_DATE_EPOCH
ARG VERSION

View File

@@ -44,13 +44,13 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DEFAULT_PUSH_TARGET := ubuntu20.04
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7
DISTRIBUTIONS := ubuntu20.04 ubi8 centos7
META_TARGETS := packaging
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
TEST_TARGETS := $(patsubst %,test-%, $(DISTRIBUTIONS))
TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
@@ -95,6 +95,7 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg VERSION="$(VERSION)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--build-arg GIT_COMMIT_SHORT="$(GIT_COMMIT_SHORT)" \
--build-arg GIT_BRANCH="$(GIT_BRANCH)" \
--build-arg SOURCE_DATE_EPOCH="$(SOURCE_DATE_EPOCH)" \
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
@@ -105,24 +106,20 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
build-ubuntu%: BASE_DIST = $(*)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
build-ubi8: BASE_DIST := ubi8
build-ubi8: DOCKERFILE_SUFFIX := centos
build-ubi8: PACKAGE_DIST = centos8
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-centos7: BASE_DIST = $(*)
build-centos7: DOCKERFILE_SUFFIX := centos
build-centos7: PACKAGE_DIST = $(BASE_DIST)
build-centos7: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-packaging: BASE_DIST := ubuntu20.04
build-packaging: DOCKERFILE_SUFFIX := packaging
build-packaging: PACKAGE_ARCH := amd64
build-packaging: PACKAGE_DIST = all
build-packaging: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
# Test targets
test-%: DIST = $(*)
@@ -146,10 +143,7 @@ test-packaging:
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian10/amd64" || echo "Missing debian10/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian9/amd64" || echo "Missing debian9/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/opensuse-leap15.1/x86_64" || echo "Missing opensuse-leap15.1/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu16.04/amd64" || echo "Missing ubuntu16.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu16.04/ppc64le" || echo "Missing ubuntu16.04/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"

View File

@@ -10,6 +10,7 @@ import (
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
)
@@ -130,7 +131,7 @@ func isPrivileged(s *Spec) bool {
}
var caps []string
// If v1.1.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
// If v1.0.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54
rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1")
rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5")
@@ -139,28 +140,31 @@ func isPrivileged(s *Spec) bool {
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
} else {
var lc LinuxCapabilities
err := json.Unmarshal(*s.Process.Capabilities, &lc)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
for _, c := range caps {
if c == capSysAdmin {
return true
}
}
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
caps = lc.Bounding
return false
}
for _, c := range caps {
if c == capSysAdmin {
return true
}
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
process := specs.Process{
Env: s.Process.Env,
}
return false
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
fullSpec := specs.Spec{
Version: *s.Version,
Process: &process,
}
return image.IsPrivileged(&fullSpec)
}
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {

View File

@@ -43,8 +43,9 @@ type HookConfig struct {
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"`
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"`
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"`
NVIDIAContainerRuntimeHook config.RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
}
func getDefaultHookConfig() HookConfig {
@@ -66,7 +67,8 @@ func getDefaultHookConfig() HookConfig {
User: nil,
Ldconfig: nil,
},
NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(),
NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(),
NVIDIAContainerRuntimeHook: *config.GetDefaultRuntimeHookConfig(),
}
}

View File

@@ -74,7 +74,7 @@ func doPrestart() {
hook := getHookConfig()
cli := hook.NvidiaContainerCLI
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
if !hook.NVIDIAContainerRuntimeHook.SkipModeDetection && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
}

View File

@@ -0,0 +1,34 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)
func main() {
rt := runtime.New(
runtime.WithModeOverride("cdi"),
)
err := rt.Run(os.Args)
if err != nil {
os.Exit(1)
}
}

View File

@@ -0,0 +1,34 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)
func main() {
rt := runtime.New(
runtime.WithModeOverride("legacy"),
)
err := rt.Run(os.Args)
if err != nil {
os.Exit(1)
}
}

View File

@@ -1,89 +1,15 @@
package main
import (
"fmt"
"os"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)
// version must be set by go build's -X main.version= option in the Makefile.
var version = "unknown"
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
var logger = NewLogger()
func main() {
err := run(os.Args)
r := runtime.New()
err := r.Run(os.Args)
if err != nil {
logger.Errorf("%v", err)
os.Exit(1)
}
}
// run is an entry point that allows for idiomatic handling of errors
// when calling from the main function.
func run(argv []string) (rerr error) {
printVersion := hasVersionFlag(argv)
if printVersion {
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
}
cfg, err := config.GetConfig()
if err != nil {
return fmt.Errorf("error loading config: %v", err)
}
logger, err = UpdateLogger(
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
argv,
)
if err != nil {
return fmt.Errorf("failed to set up logger: %v", err)
}
defer func() {
if rerr != nil {
logger.Errorf("%v", rerr)
}
logger.Reset()
}()
logger.Debugf("Command line arguments: %v", argv)
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
if err != nil {
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
}
if printVersion {
fmt.Print("\n")
}
return runtime.Exec(argv)
}
// TODO: This should be refactored / combined with parseArgs in logger.
func hasVersionFlag(args []string) bool {
for i := 0; i < len(args); i++ {
param := args[i]
parts := strings.SplitN(param, "=", 2)
trimmed := strings.TrimLeft(parts[0], "-")
// If this is not a flag we continue
if parts[0] == trimmed {
continue
}
// Check the version flag
if trimmed == "version" {
return true
}
}
return false
}

View File

@@ -13,6 +13,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
)
@@ -41,7 +42,7 @@ func TestMain(m *testing.M) {
var err error
moduleRoot, err := test.GetModuleRoot()
if err != nil {
logger.Fatalf("error in test setup: could not get module root: %v", err)
logrus.Fatalf("error in test setup: could not get module root: %v", err)
}
testBinPath := filepath.Join(moduleRoot, "test", "bin")
testInputPath := filepath.Join(moduleRoot, "test", "input")
@@ -53,11 +54,11 @@ func TestMain(m *testing.M) {
// Confirm that the environment is configured correctly
runcPath, err := exec.LookPath(runcExecutableName)
if err != nil || filepath.Join(testBinPath, runcExecutableName) != runcPath {
logger.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
logrus.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
}
hookPath, err := exec.LookPath(nvidiaHook)
if err != nil || filepath.Join(testBinPath, nvidiaHook) != hookPath {
logger.Fatalf("error in test setup: mock hook path set incorrectly in TestMain(): %v", err)
logrus.Fatalf("error in test setup: mock hook path set incorrectly in TestMain(): %v", err)
}
// Store the root and binary paths in the test Config
@@ -77,7 +78,7 @@ func TestMain(m *testing.M) {
// case 1) nvidia-container-runtime run --bundle
// case 2) nvidia-container-runtime create --bundle
// - Confirm the runtime handles bad input correctly
// - Confirm the runtime handles bad input correctly
func TestBadInput(t *testing.T) {
err := cfg.generateNewRuntimeSpec()
if err != nil {
@@ -91,9 +92,10 @@ func TestBadInput(t *testing.T) {
}
// case 1) nvidia-container-runtime run --bundle <bundle-name> <ctr-name>
// - Confirm the runtime runs with no errors
// - Confirm the runtime runs with no errors
//
// case 2) nvidia-container-runtime create --bundle <bundle-name> <ctr-name>
// - Confirm the runtime inserts the NVIDIA prestart hook correctly
// - Confirm the runtime inserts the NVIDIA prestart hook correctly
func TestGoodInput(t *testing.T) {
err := cfg.generateNewRuntimeSpec()
if err != nil {
@@ -170,7 +172,7 @@ func TestDuplicateHook(t *testing.T) {
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
// testing.
func addNVIDIAHook(spec *specs.Spec) error {
m := modifier.NewStableRuntimeModifier(logger.Logger)
m := modifier.NewStableRuntimeModifier(logrus.StandardLogger(), nvidiaHook)
return m.Modify(spec)
}

View File

@@ -18,6 +18,7 @@ package cdi
import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
@@ -44,6 +45,7 @@ func (m command) build() *cli.Command {
hook.Subcommands = []*cli.Command{
generate.NewCommand(m.logger),
transform.NewCommand(m.logger),
}
return &hook

View File

@@ -18,25 +18,22 @@ package generate
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"sigs.k8s.io/yaml"
)
const (
formatJSON = "json"
formatYAML = "yaml"
allDeviceName = "all"
)
type command struct {
@@ -47,8 +44,11 @@ type config struct {
output string
format string
deviceNameStrategy string
root string
driverRoot string
nvidiaCTKPath string
mode string
vendor string
class string
}
// NewCommand constructs a generate-cdi command with the specified logger
@@ -84,77 +84,77 @@ func (m command) build() *cli.Command {
&cli.StringFlag{
Name: "format",
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
Value: formatYAML,
Value: spec.FormatYAML,
Destination: &cfg.format,
},
&cli.StringFlag{
Name: "mode",
Aliases: []string{"discovery-mode"},
Usage: "The mode to use when discovering the available entities. One of [auto | nvml | wsl]. If mode is set to 'auto' the mode will be determined based on the system configuration.",
Value: nvcdi.ModeAuto,
Destination: &cfg.mode,
},
&cli.StringFlag{
Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
Value: deviceNameStrategyIndex,
Value: nvcdi.DeviceNameStrategyIndex,
Destination: &cfg.deviceNameStrategy,
},
&cli.StringFlag{
Name: "root",
Usage: "Specify the root to use when discovering the entities that should be included in the CDI specification.",
Destination: &cfg.root,
Name: "driver-root",
Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.",
Destination: &cfg.driverRoot,
},
&cli.StringFlag{
Name: "nvidia-ctk-path",
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
Destination: &cfg.nvidiaCTKPath,
},
&cli.StringFlag{
Name: "vendor",
Aliases: []string{"cdi-vendor"},
Usage: "the vendor string to use for the generated CDI specification.",
Value: "nvidia.com",
Destination: &cfg.vendor,
},
&cli.StringFlag{
Name: "class",
Aliases: []string{"cdi-class"},
Usage: "the class string to use for the generated CDI specification.",
Value: "gpu",
Destination: &cfg.class,
},
}
return &c
}
func (m command) validateFlags(r *cli.Context, cfg *config) error {
func (m command) validateFlags(c *cli.Context, cfg *config) error {
cfg.format = strings.ToLower(cfg.format)
switch cfg.format {
case formatJSON:
case formatYAML:
case spec.FormatJSON:
case spec.FormatYAML:
default:
return fmt.Errorf("invalid output format: %v", cfg.format)
}
_, err := newDeviceNamer(cfg.deviceNameStrategy)
cfg.mode = strings.ToLower(cfg.mode)
switch cfg.mode {
case nvcdi.ModeAuto:
case nvcdi.ModeNvml:
case nvcdi.ModeWsl:
case nvcdi.ModeManagement:
default:
return fmt.Errorf("invalid discovery mode: %v", cfg.mode)
}
_, err := nvcdi.NewDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return err
}
return nil
}
func (m command) run(c *cli.Context, cfg *config) error {
deviceNamer, err := newDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return fmt.Errorf("failed to create device namer: %v", err)
}
spec, err := m.generateSpec(
cfg.root,
discover.FindNvidiaCTK(m.logger, cfg.nvidiaCTKPath),
deviceNamer,
)
if err != nil {
return fmt.Errorf("failed to generate CDI spec: %v", err)
}
var outputTo io.Writer
if cfg.output == "" {
outputTo = os.Stdout
} else {
err := createParentDirsIfRequired(cfg.output)
if err != nil {
return fmt.Errorf("failed to create parent folders for output file: %v", err)
}
outputFile, err := os.Create(cfg.output)
if err != nil {
return fmt.Errorf("failed to create output file: %v", err)
}
defer outputFile.Close()
outputTo = outputFile
}
cfg.nvidiaCTKPath = discover.FindNvidiaCTK(m.logger, cfg.nvidiaCTKPath)
if outputFileFormat := formatFromFilename(cfg.output); outputFileFormat != "" {
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
@@ -165,219 +165,120 @@ func (m command) run(c *cli.Context, cfg *config) error {
}
}
data, err := yaml.Marshal(spec)
if err != nil {
return fmt.Errorf("failed to marshal CDI spec: %v", err)
if err := cdi.ValidateVendorName(cfg.vendor); err != nil {
return fmt.Errorf("invalid CDI vendor name: %v", err)
}
if strings.ToLower(cfg.format) == formatJSON {
data, err = yaml.YAMLToJSONStrict(data)
if err != nil {
return fmt.Errorf("failed to convert CDI spec from YAML to JSON: %v", err)
}
if err := cdi.ValidateClassName(cfg.class); err != nil {
return fmt.Errorf("invalid CDI class name: %v", err)
}
err = writeToOutput(cfg.format, data, outputTo)
if err != nil {
return fmt.Errorf("failed to write output: %v", err)
}
return nil
}
func (m command) run(c *cli.Context, cfg *config) error {
spec, err := m.generateSpec(cfg)
if err != nil {
return fmt.Errorf("failed to generate CDI spec: %v", err)
}
m.logger.Infof("Generated CDI spec with version %v", spec.Raw().Version)
if cfg.output == "" {
_, err := spec.WriteTo(os.Stdout)
if err != nil {
return fmt.Errorf("failed to write CDI spec to STDOUT: %v", err)
}
return nil
}
return spec.Save(cfg.output)
}
func formatFromFilename(filename string) string {
ext := filepath.Ext(filename)
switch strings.ToLower(ext) {
case ".json":
return formatJSON
case ".yaml":
return formatYAML
case ".yml":
return formatYAML
return spec.FormatJSON
case ".yaml", ".yml":
return spec.FormatYAML
}
return ""
}
func writeToOutput(format string, data []byte, output io.Writer) error {
if format == formatYAML {
_, err := output.Write([]byte("---\n"))
if err != nil {
return fmt.Errorf("failed to write YAML separator: %v", err)
}
}
_, err := output.Write(data)
func (m command) generateSpec(cfg *config) (spec.Interface, error) {
deviceNamer, err := nvcdi.NewDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return fmt.Errorf("failed to write data: %v", err)
return nil, fmt.Errorf("failed to create device namer: %v", err)
}
return nil
}
func (m command) generateSpec(root string, nvidiaCTKPath string, namer deviceNamer) (*specs.Spec, error) {
nvmllib := nvml.New()
if r := nvmllib.Init(); r != nvml.SUCCESS {
return nil, r
cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(cfg.driverRoot),
nvcdi.WithNVIDIACTKPath(cfg.nvidiaCTKPath),
nvcdi.WithDeviceNamer(deviceNamer),
nvcdi.WithMode(string(cfg.mode)),
)
if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err)
}
defer nvmllib.Shutdown()
devicelib := device.New(device.WithNvml(nvmllib))
deviceSpecs, err := m.generateDeviceSpecs(devicelib, root, nvidiaCTKPath, namer)
deviceSpecs, err := cdilib.GetAllDeviceSpecs()
if err != nil {
return nil, fmt.Errorf("failed to create device CDI specs: %v", err)
}
var hasAll bool
for _, deviceSpec := range deviceSpecs {
if deviceSpec.Name == allDeviceName {
hasAll = true
break
}
}
if !hasAll {
allDevice, err := MergeDeviceSpecs(deviceSpecs, allDeviceName)
if err != nil {
return nil, fmt.Errorf("failed to create CDI specification for %q device: %v", allDeviceName, err)
}
deviceSpecs = append(deviceSpecs, allDevice)
}
allDevice := createAllDevice(deviceSpecs)
deviceSpecs = append(deviceSpecs, allDevice)
allEdits := edits.NewContainerEdits()
ipcs, err := NewIPCDiscoverer(m.logger, root)
commonEdits, err := cdilib.GetCommonEdits()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
return nil, fmt.Errorf("failed to create edits common for entities: %v", err)
}
ipcEdits, err := edits.FromDiscoverer(ipcs)
if err != nil {
return nil, fmt.Errorf("failed to create container edits for IPC sockets: %v", err)
}
// TODO: We should not have to update this after the fact
for _, s := range ipcEdits.Mounts {
s.Options = append(s.Options, "noexec")
}
allEdits.Append(ipcEdits)
common, err := NewCommonDiscoverer(m.logger, root, nvidiaCTKPath, nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
}
deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, root, nvidiaCTKPath, deviceSpecs)
if err != nil {
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
}
commonEdits, err := edits.FromDiscoverer(discover.Merge(common, deviceFolderPermissionHooks))
if err != nil {
return nil, fmt.Errorf("failed to create container edits for common entities: %v", err)
}
allEdits.Append(commonEdits)
// We construct the spec and determine the minimum required version based on the specification.
spec := specs.Spec{
Version: "NOT_SET",
Kind: "nvidia.com/gpu",
Devices: deviceSpecs,
ContainerEdits: *allEdits.ContainerEdits,
}
minVersion, err := cdi.MinimumRequiredVersion(&spec)
if err != nil {
return nil, fmt.Errorf("failed to get minumum required CDI spec version: %v", err)
}
m.logger.Infof("Using minimum required CDI spec version: %s", minVersion)
spec.Version = minVersion
return &spec, nil
return spec.New(
spec.WithVendor(cfg.vendor),
spec.WithClass(cfg.class),
spec.WithDeviceSpecs(deviceSpecs),
spec.WithEdits(*commonEdits.ContainerEdits),
spec.WithFormat(cfg.format),
spec.WithPermissions(0644),
)
}
func (m command) generateDeviceSpecs(devicelib device.Interface, root string, nvidiaCTKPath string, namer deviceNamer) ([]specs.Device, error) {
var deviceSpecs []specs.Device
err := devicelib.VisitDevices(func(i int, d device.Device) error {
isMigEnabled, err := d.IsMigEnabled()
if err != nil {
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
// MergeDeviceSpecs creates a device with the specified name which combines the edits from the previous devices.
// If a device of the specified name already exists, an error is returned.
func MergeDeviceSpecs(deviceSpecs []specs.Device, mergedDeviceName string) (specs.Device, error) {
if err := cdi.ValidateDeviceName(mergedDeviceName); err != nil {
return specs.Device{}, fmt.Errorf("invalid device name %q: %v", mergedDeviceName, err)
}
for _, d := range deviceSpecs {
if d.Name == mergedDeviceName {
return specs.Device{}, fmt.Errorf("device %q already exists", mergedDeviceName)
}
if isMigEnabled {
return nil
}
device, err := NewFullGPUDiscoverer(m.logger, root, nvidiaCTKPath, d)
if err != nil {
return fmt.Errorf("failed to create device: %v", err)
}
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return fmt.Errorf("failed to create container edits for device: %v", err)
}
deviceName, err := namer.GetDeviceName(i, d)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: deviceName,
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
}
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
device, err := NewMigDeviceDiscoverer(m.logger, "", d, mig)
if err != nil {
return fmt.Errorf("failed to create MIG device: %v", err)
}
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return fmt.Errorf("failed to create container edits for MIG device: %v", err)
}
deviceName, err := namer.GetMigDeviceName(i, j, mig)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: deviceName,
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
return nil
})
if err != nil {
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
}
return deviceSpecs, nil
}
// createAllDevice creates an 'all' device which combines the edits from the previous devices
func createAllDevice(deviceSpecs []specs.Device) specs.Device {
edits := edits.NewContainerEdits()
mergedEdits := edits.NewContainerEdits()
for _, d := range deviceSpecs {
edit := cdi.ContainerEdits{
ContainerEdits: &d.ContainerEdits,
}
edits.Append(&edit)
mergedEdits.Append(&edit)
}
all := specs.Device{
Name: "all",
ContainerEdits: *edits.ContainerEdits,
merged := specs.Device{
Name: mergedDeviceName,
ContainerEdits: *mergedEdits.ContainerEdits,
}
return all
}
// createParentDirsIfRequired creates the parent folders of the specified path if requried.
// Note that MkdirAll does not specifically check whether the specified path is non-empty and raises an error if it is.
// The path will be empty if filename in the current folder is specified, for example
func createParentDirsIfRequired(filename string) error {
dir := filepath.Dir(filename)
if dir == "" {
return nil
}
return os.MkdirAll(dir, 0755)
return merged, nil
}

View File

@@ -0,0 +1,117 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"testing"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
)
func TestMergeDeviceSpecs(t *testing.T) {
testCases := []struct {
description string
deviceSpecs []specs.Device
mergedDeviceName string
expectedError error
expected specs.Device
}{
{
description: "no devices",
mergedDeviceName: "all",
expected: specs.Device{
Name: "all",
},
},
{
description: "one device",
mergedDeviceName: "all",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
expected: specs.Device{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
{
description: "two devices",
mergedDeviceName: "all",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
{
Name: "gpu1",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=1"},
},
},
},
expected: specs.Device{
Name: "all",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0", "GPU=1"},
},
},
},
{
description: "has merged device",
mergedDeviceName: "gpu0",
deviceSpecs: []specs.Device{
{
Name: "gpu0",
ContainerEdits: specs.ContainerEdits{
Env: []string{"GPU=0"},
},
},
},
expectedError: fmt.Errorf("device %q already exists", "gpu0"),
},
{
description: "invalid merged device name",
mergedDeviceName: ".-not-valid",
expectedError: fmt.Errorf("invalid device name %q", ".-not-valid"),
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
mergedDevice, err := MergeDeviceSpecs(tc.deviceSpecs, tc.mergedDeviceName)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, tc.expected, mergedDevice)
})
}
}

View File

@@ -1,75 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// NewMigDeviceDiscoverer creates a discoverer for the specified mig device and its parent.
func NewMigDeviceDiscoverer(logger *logrus.Logger, root string, parent device.Device, d device.MigDevice) (discover.Discover, error) {
minor, ret := parent.GetMinorNumber()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
}
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
migCaps, err := nvcaps.NewMigCaps()
if err != nil {
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
}
gi, ret := d.GetGpuInstanceId()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
}
ci, ret := d.GetComputeInstanceId()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
}
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
if err != nil {
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
}
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
if err != nil {
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
}
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
[]string{
parentPath,
giCapDevicePath,
ciCapDevicePath,
},
root,
)
return deviceNodes, nil
}

View File

@@ -0,0 +1,159 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package root
import (
"fmt"
"io"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type loadSaver interface {
Load() (spec.Interface, error)
Save(spec.Interface) error
}
type command struct {
logger *logrus.Logger
}
type transformOptions struct {
input string
output string
}
type options struct {
transformOptions
from string
to string
}
// NewCommand constructs a generate-cdi command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "root",
Usage: "Apply a root transform to a CDI specification",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "input",
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
Value: "-",
Destination: &opts.input,
},
&cli.StringFlag{
Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
Destination: &opts.output,
},
&cli.StringFlag{
Name: "from",
Usage: "specify the root to be transformed",
Destination: &opts.from,
},
&cli.StringFlag{
Name: "to",
Usage: "specify the replacement root. If this is the same as the from root, the transform is a no-op.",
Value: "",
Destination: &opts.to,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, opts *options) error {
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
spec, err := opts.Load()
if err != nil {
return fmt.Errorf("failed to load CDI specification: %w", err)
}
err = transform.NewRootTransformer(
opts.from,
opts.to,
).Transform(spec.Raw())
if err != nil {
return fmt.Errorf("failed to transform CDI specification: %w", err)
}
return opts.Save(spec)
}
// Load lodas the input CDI specification
func (o transformOptions) Load() (spec.Interface, error) {
contents, err := o.getContents()
if err != nil {
return nil, fmt.Errorf("failed to read spec contents: %v", err)
}
raw, err := cdi.ParseSpec(contents)
if err != nil {
return nil, fmt.Errorf("failed to parse CDI spec: %v", err)
}
return spec.New(
spec.WithRawSpec(raw),
)
}
func (o transformOptions) getContents() ([]byte, error) {
if o.input == "-" {
return io.ReadAll(os.Stdin)
}
return os.ReadFile(o.input)
}
// Save saves the CDI specification to the output file
func (o transformOptions) Save(s spec.Interface) error {
if o.output == "" {
_, err := s.WriteTo(os.Stdout)
if err != nil {
return fmt.Errorf("failed to write CDI spec to STDOUT: %v", err)
}
return nil
}
return s.Save(o.output)
}

View File

@@ -0,0 +1,51 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package transform
import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
}
// NewCommand constructs a command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
c := cli.Command{
Name: "transform",
Usage: "Apply a transform to a CDI specification",
}
c.Flags = []cli.Flag{}
c.Subcommands = []*cli.Command{
root.NewCommand(m.logger),
}
return &c
}

View File

@@ -18,6 +18,7 @@ package chmod
import (
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
@@ -133,7 +134,12 @@ func (m command) run(c *cli.Context, cfg *config) error {
func (m command) getPaths(root string, paths []string) []string {
var pathsInRoot []string
for _, f := range paths {
pathsInRoot = append(pathsInRoot, filepath.Join(root, f))
path := filepath.Join(root, f)
if _, err := os.Stat(path); err != nil {
m.logger.Debugf("Skipping path %q: %v", path, err)
continue
}
pathsInRoot = append(pathsInRoot, path)
}
return pathsInRoot

View File

@@ -84,6 +84,12 @@ func (m command) run(c *cli.Context, cfg *config) error {
return fmt.Errorf("failed to determined container root: %v", err)
}
_, err = os.Stat(filepath.Join(containerRoot, "/etc/ld.so.cache"))
if err != nil && os.IsNotExist(err) {
m.logger.Debugf("No ld.so.cache found, skipping update")
return nil
}
err = m.createConfig(containerRoot, cfg.folders.Value())
if err != nil {
return fmt.Errorf("failed to update ld.so.conf: %v", err)
@@ -105,6 +111,10 @@ func (m command) createConfig(root string, folders []string) error {
return nil
}
if err := os.MkdirAll(filepath.Join(root, "/etc/ld.so.conf.d"), 0755); err != nil {
return fmt.Errorf("failed to create ld.so.conf.d: %v", err)
}
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
if err != nil {
return fmt.Errorf("failed to create config file: %v", err)

View File

@@ -22,8 +22,8 @@ import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/crio"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine/docker"
"github.com/pelletier/go-toml"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
@@ -127,13 +127,14 @@ func (m command) configureDocker(c *cli.Context, config *config) error {
configFilePath = defaultDockerConfigFilePath
}
cfg, err := docker.LoadConfig(configFilePath)
cfg, err := docker.New(
docker.WithPath(configFilePath),
)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = docker.UpdateConfig(
cfg,
err = cfg.AddRuntime(
config.nvidiaOptions.RuntimeName,
config.nvidiaOptions.RuntimePath,
config.nvidiaOptions.SetAsDefault,
@@ -150,12 +151,16 @@ func (m command) configureDocker(c *cli.Context, config *config) error {
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
return nil
}
err = docker.FlushConfig(cfg, configFilePath)
n, err := cfg.Save(configFilePath)
if err != nil {
return fmt.Errorf("unable to flush config: %v", err)
}
m.logger.Infof("Wrote updated config to %v", configFilePath)
if n == 0 {
m.logger.Infof("Removed empty config from %v", configFilePath)
} else {
m.logger.Infof("Wrote updated config to %v", configFilePath)
}
m.logger.Infof("It is recommended that the docker daemon be restarted.")
return nil
@@ -168,13 +173,14 @@ func (m command) configureCrio(c *cli.Context, config *config) error {
configFilePath = defaultCrioConfigFilePath
}
cfg, err := crio.LoadConfig(configFilePath)
cfg, err := crio.New(
crio.WithPath(configFilePath),
)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = crio.UpdateConfig(
cfg,
err = cfg.AddRuntime(
config.nvidiaOptions.RuntimeName,
config.nvidiaOptions.RuntimePath,
config.nvidiaOptions.SetAsDefault,
@@ -191,12 +197,16 @@ func (m command) configureCrio(c *cli.Context, config *config) error {
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
return nil
}
err = crio.FlushConfig(configFilePath, cfg)
n, err := cfg.Save(configFilePath)
if err != nil {
return fmt.Errorf("unable to flush config: %v", err)
}
m.logger.Infof("Wrote updated config to %v", configFilePath)
if n == 0 {
m.logger.Infof("Removed empty config from %v", configFilePath)
} else {
m.logger.Infof("Wrote updated config to %v", configFilePath)
}
m.logger.Infof("It is recommended that the cri-o daemon be restarted.")
return nil

View File

@@ -28,29 +28,40 @@ import (
type allPossible struct {
logger *logrus.Logger
driverRoot string
devRoot string
deviceMajors devices.Devices
migCaps nvcaps.MigCaps
}
// newAllPossible returns a new allPossible device node lister.
// This lister lists all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func newAllPossible(logger *logrus.Logger, driverRoot string) (nodeLister, error) {
func newAllPossible(logger *logrus.Logger, devRoot string) (nodeLister, error) {
deviceMajors, err := devices.GetNVIDIADevices()
if err != nil {
return nil, fmt.Errorf("failed reading device majors: %v", err)
}
var requiredMajors []devices.Name
migCaps, err := nvcaps.NewMigCaps()
if err != nil {
return nil, fmt.Errorf("failed to read MIG caps: %v", err)
}
if migCaps == nil {
migCaps = make(nvcaps.MigCaps)
} else {
requiredMajors = append(requiredMajors, devices.NVIDIACaps)
}
requiredMajors = append(requiredMajors, devices.NVIDIAGPU, devices.NVIDIAUVM)
for _, name := range requiredMajors {
if !deviceMajors.Exists(name) {
return nil, fmt.Errorf("missing required device major %s", name)
}
}
l := allPossible{
logger: logger,
driverRoot: driverRoot,
devRoot: devRoot,
deviceMajors: deviceMajors,
migCaps: migCaps,
}
@@ -61,7 +72,7 @@ func newAllPossible(logger *logrus.Logger, driverRoot string) (nodeLister, error
// DeviceNodes returns a list of all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func (m allPossible) DeviceNodes() ([]deviceNode, error) {
gpus, err := nvpci.NewFrom(
filepath.Join(m.driverRoot, nvpci.PCIDevicesRoot),
filepath.Join(m.devRoot, nvpci.PCIDevicesRoot),
).GetGPUs()
if err != nil {
return nil, fmt.Errorf("failed to get GPU information: %v", err)
@@ -69,7 +80,7 @@ func (m allPossible) DeviceNodes() ([]deviceNode, error) {
count := len(gpus)
if count == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
return nil, nil
}
@@ -168,7 +179,7 @@ func (m allPossible) newDeviceNode(deviceName devices.Name, path string, minor i
major, _ := m.deviceMajors.Get(deviceName)
return deviceNode{
path: filepath.Join(m.driverRoot, path),
path: filepath.Join(m.devRoot, path),
major: uint32(major),
minor: uint32(minor),
}

View File

@@ -24,6 +24,8 @@ import (
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/fsnotify/fsnotify"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
@@ -38,11 +40,13 @@ type command struct {
}
type config struct {
devCharPath string
driverRoot string
dryRun bool
watch bool
createAll bool
devCharPath string
driverRoot string
dryRun bool
watch bool
createAll bool
createDeviceNodes bool
loadKernelModules bool
}
// NewCommand constructs a command sub-command with the specified logger
@@ -97,6 +101,18 @@ func (m command) build() *cli.Command {
Destination: &cfg.createAll,
EnvVars: []string{"CREATE_ALL"},
},
&cli.BoolFlag{
Name: "load-kernel-modules",
Usage: "Load the NVIDIA kernel modules before creating symlinks. This is only applicable when --create-all is set.",
Destination: &cfg.loadKernelModules,
EnvVars: []string{"LOAD_KERNEL_MODULES"},
},
&cli.BoolFlag{
Name: "create-device-nodes",
Usage: "Create the NVIDIA control device nodes in the driver root if they do not exist. This is only applicable when --create-all is set",
Destination: &cfg.createDeviceNodes,
EnvVars: []string{"CREATE_DEVICE_NODES"},
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "If set, the command will not create any symlinks.",
@@ -114,6 +130,16 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
return fmt.Errorf("create-all and watch are mutually exclusive")
}
if cfg.loadKernelModules && !cfg.createAll {
m.logger.Warn("load-kernel-modules is only applicable when create-all is set; ignoring")
cfg.loadKernelModules = false
}
if cfg.createDeviceNodes && !cfg.createAll {
m.logger.Warn("create-device-nodes is only applicable when create-all is set; ignoring")
cfg.createDeviceNodes = false
}
return nil
}
@@ -137,6 +163,8 @@ func (m command) run(c *cli.Context, cfg *config) error {
WithDriverRoot(cfg.driverRoot),
WithDryRun(cfg.dryRun),
WithCreateAll(cfg.createAll),
WithLoadKernelModules(cfg.loadKernelModules),
WithCreateDeviceNodes(cfg.createDeviceNodes),
)
if err != nil {
return fmt.Errorf("failed to create symlink creator: %v", err)
@@ -186,12 +214,15 @@ create:
}
type linkCreator struct {
logger *logrus.Logger
lister nodeLister
driverRoot string
devCharPath string
dryRun bool
createAll bool
logger *logrus.Logger
lister nodeLister
driverRoot string
devRoot string
devCharPath string
dryRun bool
createAll bool
createDeviceNodes bool
loadKernelModules bool
}
// Creator is an interface for creating symlinks to /dev/nv* devices in /dev/char.
@@ -214,29 +245,76 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
if c.driverRoot == "" {
c.driverRoot = "/"
}
if c.devRoot == "" {
c.devRoot = "/"
}
if c.devCharPath == "" {
c.devCharPath = defaultDevCharPath
}
if err := c.setup(); err != nil {
return nil, err
}
if c.createAll {
lister, err := newAllPossible(c.logger, c.driverRoot)
lister, err := newAllPossible(c.logger, c.devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create all possible device lister: %v", err)
}
c.lister = lister
} else {
c.lister = existing{c.logger, c.driverRoot}
c.lister = existing{c.logger, c.devRoot}
}
return c, nil
}
func (m linkCreator) setup() error {
if !m.loadKernelModules && !m.createDeviceNodes {
return nil
}
if m.loadKernelModules {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(m.dryRun),
nvmodules.WithRoot(m.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if m.createDeviceNodes {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(m.dryRun),
nvdevices.WithDevRoot(m.devRoot),
)
if err != nil {
return err
}
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA device nodes: %v", err)
}
}
return nil
}
// WithDriverRoot sets the driver root path.
// This is the path in which kernel modules must be loaded.
func WithDriverRoot(root string) Option {
return func(c *linkCreator) {
c.driverRoot = root
}
}
// WithDevRoot sets the root path for the /dev directory.
func WithDevRoot(root string) Option {
return func(c *linkCreator) {
c.devRoot = root
}
}
// WithDevCharPath sets the path at which the symlinks will be created.
func WithDevCharPath(path string) Option {
return func(c *linkCreator) {
@@ -265,6 +343,20 @@ func WithCreateAll(createAll bool) Option {
}
}
// WithLoadKernelModules sets the loadKernelModules flag for the linkCreator.
func WithLoadKernelModules(loadKernelModules bool) Option {
return func(lc *linkCreator) {
lc.loadKernelModules = loadKernelModules
}
}
// WithCreateDeviceNodes sets the createDeviceNodes flag for the linkCreator.
func WithCreateDeviceNodes(createDeviceNodes bool) Option {
return func(lc *linkCreator) {
lc.createDeviceNodes = createDeviceNodes
}
}
// CreateLinks creates symlinks for all NVIDIA device nodes found in the driver root.
func (m linkCreator) CreateLinks() error {
deviceNodes, err := m.lister.DeviceNodes()

View File

@@ -30,8 +30,8 @@ type nodeLister interface {
}
type existing struct {
logger *logrus.Logger
driverRoot string
logger *logrus.Logger
devRoot string
}
// DeviceNodes returns a list of NVIDIA device nodes in the specified root.
@@ -39,7 +39,7 @@ type existing struct {
func (m existing) DeviceNodes() ([]deviceNode, error) {
locator := lookup.NewCharDeviceLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(m.driverRoot),
lookup.WithRoot(m.devRoot),
lookup.WithOptional(true),
)
@@ -54,7 +54,7 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
}
if len(devices) == 0 && len(capDevices) == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
return nil, nil
}

View File

@@ -0,0 +1,126 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package createdevicenodes
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
}
type options struct {
driverRoot string
dryRun bool
control bool
loadKernelModules bool
}
// NewCommand constructs a command sub-command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "create-device-nodes",
Usage: "A utility to create NVIDIA device ndoes",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "driver-root",
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "control-devices",
Usage: "create all control device nodes: nvidiactl, nvidia-modeset, nvidia-uvm, nvidia-uvm-tools",
Destination: &opts.control,
},
&cli.BoolFlag{
Name: "load-kernel-modules",
Usage: "load the NVIDIA Kernel Modules before creating devices nodes",
Destination: &opts.loadKernelModules,
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "if set, the command will not create any symlinks.",
Value: false,
Destination: &opts.dryRun,
EnvVars: []string{"DRY_RUN"},
},
}
return &c
}
func (m command) validateFlags(r *cli.Context, opts *options) error {
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
if opts.loadKernelModules {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(opts.dryRun),
nvmodules.WithRoot(opts.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if opts.control {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(opts.dryRun),
nvdevices.WithDevRoot(opts.driverRoot),
)
if err != nil {
return err
}
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
}
}
return nil
}

View File

@@ -18,6 +18,7 @@ package system
import (
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
@@ -43,6 +44,7 @@ func (m command) build() *cli.Command {
system.Subcommands = []*cli.Command{
devchar.NewCommand(m.logger),
devicenodes.NewCommand(m.logger),
}
return &system

View File

@@ -14,10 +14,10 @@
# Supported OSs by architecture
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
AARCH64_TARGETS := centos8 rhel8 amazonlinux2
# Define top-level build targets
docker%: SHELL:=/bin/bash
@@ -88,53 +88,31 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
# private ubuntu target
--ubuntu%: OS := ubuntu
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--ubuntu%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
--ubuntu%: PKG_REV := 1
# private debian target
--debian%: OS := debian
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--debian%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
--debian%: PKG_REV := 1
# private centos target
--centos%: OS := centos
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
# private fedora target
--fedora%: OS := fedora
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
# The fedora(35) base image has very slow performance when building aarch64 packages.
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
# private amazonlinux target
--amazonlinux%: OS := amazonlinux
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
# private opensuse-leap target
--opensuse-leap%: OS = opensuse-leap
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
--opensuse-leap%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
# private rhel target (actually built on centos)
--rhel%: OS := centos
--rhel%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
@@ -155,8 +133,8 @@ docker-build-%:
--build-arg BASEIMAGE="$(BASEIMAGE)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg PKG_NAME="$(LIB_NAME)" \
--build-arg PKG_VERS="$(LIB_VERSION)" \
--build-arg PKG_REV="$(PKG_REV)" \
--build-arg PKG_VERS="$(PACKAGE_VERSION)" \
--build-arg PKG_REV="$(PACKAGE_REVISION)" \
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \

24
go.mod
View File

@@ -1,31 +1,30 @@
module github.com/NVIDIA/nvidia-container-toolkit
go 1.18
go 1.20
require (
github.com/BurntSushi/toml v1.0.0
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
github.com/BurntSushi/toml v1.2.1
github.com/NVIDIA/go-nvml v0.12.0-0
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a
github.com/fsnotify/fsnotify v1.5.4
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb
github.com/opencontainers/runtime-spec v1.1.0-rc.2
github.com/pelletier/go-toml v1.9.4
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.7.0
github.com/stretchr/testify v1.8.1
github.com/urfave/cli/v2 v2.3.0
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230209143738-95328d8c4438
golang.org/x/mod v0.5.0
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6
sigs.k8s.io/yaml v1.3.0
golang.org/x/sys v0.7.0
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/opencontainers/runc v1.1.4 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/opencontainers/runc v1.1.6 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/opencontainers/selinux v1.10.1 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
@@ -33,4 +32,5 @@ require (
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)

76
go.sum
View File

@@ -1,84 +1,76 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.0.0 h1:dtDWrepsVPfW9H/4y7dDgFc2MBUSeJhlaDtK13CxFlU=
github.com/BurntSushi/toml v1.0.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82 h1:x751Xx1tdxkiA/sdkv2J769n21UbYKzVOpe9S/h1M3k=
github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/NVIDIA/go-nvml v0.12.0-0 h1:eHYNHbzAsMgWYshf6dEmTY66/GCXnORJFnzm3TNH4mc=
github.com/NVIDIA/go-nvml v0.12.0-0/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a h1:sP3PcgyIkRlHqfF3Jfpe/7G8kf/qpzG4C8r94y9hLbE=
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a/go.mod h1:xMRa4fJgXzSDFUCURSimOUgoSc+odohvO3uXT9xjqH0=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.1 h1:r/myEWzV9lfsM1tFLgDyu0atFtJ1fXn261LKYj/3DxU=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runc v1.1.4 h1:nRCz/8sKg6K6jgYAFLDlXzPeITBZJyX28DBVhWD+5dg=
github.com/opencontainers/runc v1.1.4/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb h1:1xSVPOd7/UA+39/hXEGnBJ13p6JFB0E1EvQFlrRDOXI=
github.com/opencontainers/runc v1.1.6 h1:XbhB8IfG/EsnhNvZtNdLB0GBw92GYEFvKlhaJk9jUgA=
github.com/opencontainers/runc v1.1.6/go.mod h1:CbUumNnWCuTGFukNXahoo/RFBZvDAgRh/smNYNOhA50=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.1.0-rc.2 h1:ucBtEms2tamYYW/SvGpvq9yUN0NEVL6oyLEwDcTSrk8=
github.com/opencontainers/runtime-spec v1.1.0-rc.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.10.1 h1:09LIPVRP3uuZGQvgR+SgMSNBd1Eb3vlRbGqQpoHsF8w=
github.com/opencontainers/selinux v1.10.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -86,33 +78,19 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342 h1:083n9fJt2dWOpJd/X/q9Xgl5XtQLL22uSFYbzVqJssg=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342/go.mod h1:GStidGxhaqJhYFW1YpOnLvYCbL2EsM0od7IW4u7+JgU=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230209143738-95328d8c4438 h1:+qRai7XRl8omFQVCeHcaWzL542Yw64vfmuXG+79ZCIc=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230209143738-95328d8c4438/go.mod h1:GStidGxhaqJhYFW1YpOnLvYCbL2EsM0od7IW4u7+JgU=
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6 h1:cy1ko5847T/lJ45eyg/7uLprIE/amW5IXxGtEnQdYMI=
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

View File

@@ -21,13 +21,19 @@ import (
"io"
"os"
"path"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/pelletier/go-toml"
"github.com/sirupsen/logrus"
)
const (
configOverride = "XDG_CONFIG_HOME"
configFilePath = "nvidia-container-runtime/config.toml"
nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook"
)
var (
@@ -45,9 +51,12 @@ var (
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
type Config struct {
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
NVIDIAContainerRuntimeHookConfig RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
}
// GetConfig sets up the config struct. Values are read from a toml file
@@ -91,6 +100,8 @@ func getConfigFrom(toml *toml.Tree) (*Config, error) {
return cfg, nil
}
cfg.AcceptEnvvarUnprivileged = toml.GetDefault("accept-nvidia-visible-devices-envvar-when-unprivileged", cfg.AcceptEnvvarUnprivileged).(bool)
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
runtimeConfig, err := getRuntimeConfigFrom(toml)
@@ -99,12 +110,19 @@ func getConfigFrom(toml *toml.Tree) (*Config, error) {
}
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
runtimeHookConfig, err := getRuntimeHookConfigFrom(toml)
if err != nil {
return nil, fmt.Errorf("failed to load nvidia-container-runtime-hook config: %v", err)
}
cfg.NVIDIAContainerRuntimeHookConfig = *runtimeHookConfig
return cfg, nil
}
// getDefaultConfig defines the default values for the config
func getDefaultConfig() *Config {
c := Config{
AcceptEnvvarUnprivileged: true,
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
NVIDIACTKConfig: *getDefaultCTKConfig(),
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
@@ -112,3 +130,41 @@ func getDefaultConfig() *Config {
return &c
}
// ResolveNVIDIAContainerRuntimeHookPath resolves the path the nvidia-container-runtime-hook binary.
func ResolveNVIDIAContainerRuntimeHookPath(logger *logrus.Logger, nvidiaContainerRuntimeHookPath string) string {
return resolveWithDefault(
logger,
"NVIDIA Container Runtime Hook",
nvidiaContainerRuntimeHookPath,
nvidiaContainerRuntimeHookDefaultPath,
)
}
// resolveWithDefault resolves the path to the specified binary.
// If an absolute path is specified, it is used directly without searching for the binary.
// If the binary cannot be found in the path, the specified default is used instead.
func resolveWithDefault(logger *logrus.Logger, label string, path string, defaultPath string) string {
if filepath.IsAbs(path) {
logger.Debugf("Using specified %v path %v", label, path)
return path
}
if path == "" {
path = filepath.Base(defaultPath)
}
logger.Debugf("Locating %v as %v", label, path)
lookup := lookup.NewExecutableLocator(logger, "")
resolvedPath := defaultPath
targets, err := lookup.Locate(path)
if err != nil {
logger.Warnf("Failed to locate %v: %v", path, err)
} else {
logger.Debugf("Found %v candidates: %v", path, targets)
resolvedPath = targets[0]
}
logger.Debugf("Using %v path %v", label, path)
return resolvedPath
}

View File

@@ -57,6 +57,7 @@ func TestGetConfig(t *testing.T) {
{
description: "empty config is default",
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
},
@@ -69,8 +70,15 @@ func TestGetConfig(t *testing.T) {
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
@@ -79,6 +87,7 @@ func TestGetConfig(t *testing.T) {
{
description: "config options set inline",
contents: []string{
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
"nvidia-container-cli.root = \"/bar/baz\"",
"nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.experimental = true",
@@ -86,10 +95,14 @@ func TestGetConfig(t *testing.T) {
"nvidia-container-runtime.log-level = \"debug\"",
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
"nvidia-container-runtime.mode = \"not-auto\"",
"nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"",
"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"nvidia-container-runtime-hook.path = \"/foo/bar/nvidia-container-runtime-hook\"",
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: false,
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
},
@@ -102,8 +115,18 @@ func TestGetConfig(t *testing.T) {
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "example.vendor.com/device",
AnnotationPrefixes: []string{
"cdi.k8s.io/",
"example.vendor.com/",
},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "/foo/bar/nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
@@ -112,6 +135,7 @@ func TestGetConfig(t *testing.T) {
{
description: "config options set in section",
contents: []string{
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
"[nvidia-container-cli]",
"root = \"/bar/baz\"",
"[nvidia-container-runtime]",
@@ -121,12 +145,18 @@ func TestGetConfig(t *testing.T) {
"log-level = \"debug\"",
"runtimes = [\"/some/runtime\",]",
"mode = \"not-auto\"",
"[nvidia-container-runtime.modes.cdi]",
"default-kind = \"example.vendor.com/device\"",
"annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"[nvidia-container-runtime.modes.csv]",
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"[nvidia-container-runtime-hook]",
"path = \"/foo/bar/nvidia-container-runtime-hook\"",
"[nvidia-ctk]",
"path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: false,
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
},
@@ -139,8 +169,18 @@ func TestGetConfig(t *testing.T) {
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "example.vendor.com/device",
AnnotationPrefixes: []string{
"cdi.k8s.io/",
"example.vendor.com/",
},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "/foo/bar/nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},

View File

@@ -1,125 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package crio
import (
"fmt"
"os"
"github.com/pelletier/go-toml"
log "github.com/sirupsen/logrus"
)
// LoadConfig loads the cri-o config from disk
func LoadConfig(config string) (*toml.Tree, error) {
log.Infof("Loading config: %v", config)
info, err := os.Stat(config)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
configFile := config
if os.IsNotExist(err) {
configFile = "/dev/null"
log.Infof("Config file does not exist, creating new one")
}
cfg, err := toml.LoadFile(configFile)
if err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return cfg, nil
}
// UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime
func UpdateConfig(config *toml.Tree, runtimeClass string, runtimePath string, setAsDefault bool) error {
switch runc := config.Get("crio.runtime.runtimes.runc").(type) {
case *toml.Tree:
runc, _ = toml.Load(runc.String())
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass}, runc)
}
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_path"}, runtimePath)
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_type"}, "oci")
if setAsDefault {
config.SetPath([]string{"crio", "runtime", "default_runtime"}, runtimeClass)
}
return nil
}
// RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime
func RevertConfig(config *toml.Tree, runtimeClass string) error {
if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok {
if runtimeClass == runtime {
config.DeletePath([]string{"crio", "runtime", "default_runtime"})
}
}
runtimeClassPath := []string{"crio", "runtime", "runtimes", runtimeClass}
config.DeletePath(runtimeClassPath)
for i := 0; i < len(runtimeClassPath); i++ {
remainingPath := runtimeClassPath[:len(runtimeClassPath)-i]
if entry, ok := config.GetPath(remainingPath).(*toml.Tree); ok {
if len(entry.Keys()) != 0 {
break
}
config.DeletePath(remainingPath)
}
}
return nil
}
// FlushConfig flushes the updated/reverted config out to disk
func FlushConfig(config string, cfg *toml.Tree) error {
log.Infof("Flushing config")
output, err := cfg.ToTomlString()
if err != nil {
return fmt.Errorf("unable to convert to TOML: %v", err)
}
switch len(output) {
case 0:
err := os.Remove(config)
if err != nil {
return fmt.Errorf("unable to remove empty file: %v", err)
}
log.Infof("Config empty, removing file")
default:
f, err := os.Create(config)
if err != nil {
return fmt.Errorf("unable to open '%v' for writing: %v", config, err)
}
defer f.Close()
_, err = f.WriteString(output)
if err != nil {
return fmt.Errorf("unable to write output: %v", err)
}
}
log.Infof("Successfully flushed config")
return nil
}

View File

@@ -1,117 +0,0 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
log "github.com/sirupsen/logrus"
)
// LoadConfig loads the docker config from disk
func LoadConfig(configFilePath string) (map[string]interface{}, error) {
log.Infof("Loading docker config from %v", configFilePath)
info, err := os.Stat(configFilePath)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
cfg := make(map[string]interface{})
if os.IsNotExist(err) {
log.Infof("Config file does not exist, creating new one")
return cfg, nil
}
readBytes, err := ioutil.ReadFile(configFilePath)
if err != nil {
return nil, fmt.Errorf("unable to read config: %v", err)
}
reader := bytes.NewReader(readBytes)
if err := json.NewDecoder(reader).Decode(&cfg); err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return cfg, nil
}
// UpdateConfig updates the docker config to include the nvidia runtimes
func UpdateConfig(config map[string]interface{}, runtimeName string, runtimePath string, setAsDefault bool) error {
// Read the existing runtimes
runtimes := make(map[string]interface{})
if _, exists := config["runtimes"]; exists {
runtimes = config["runtimes"].(map[string]interface{})
}
// Add / update the runtime definitions
runtimes[runtimeName] = map[string]interface{}{
"path": runtimePath,
"args": []string{},
}
// Update the runtimes definition
if len(runtimes) > 0 {
config["runtimes"] = runtimes
}
if setAsDefault {
config["default-runtime"] = runtimeName
}
return nil
}
// FlushConfig flushes the updated/reverted config out to disk
func FlushConfig(cfg map[string]interface{}, configFilePath string) error {
log.Infof("Flushing docker config to %v", configFilePath)
output, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return fmt.Errorf("unable to convert to JSON: %v", err)
}
switch len(output) {
case 0:
err := os.Remove(configFilePath)
if err != nil {
return fmt.Errorf("unable to remove empty file: %v", err)
}
log.Infof("Config empty, removing file")
default:
f, err := os.Create(configFilePath)
if err != nil {
return fmt.Errorf("unable to open %v for writing: %v", configFilePath, err)
}
defer f.Close()
_, err = f.WriteString(string(output))
if err != nil {
return fmt.Errorf("unable to write output: %v", err)
}
}
log.Infof("Successfully flushed config")
return nil
}

View File

@@ -0,0 +1,25 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package engine
// Interface defines the API for a runtime config updater.
type Interface interface {
DefaultRuntime() string
AddRuntime(string, string, bool) error
RemoveRuntime(string) error
Save(string) (int64, error)
}

View File

@@ -0,0 +1,140 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine"
"github.com/pelletier/go-toml"
)
// ConfigV1 represents a version 1 containerd config
type ConfigV1 Config
var _ engine.Interface = (*ConfigV1)(nil)
// AddRuntime adds a runtime to the containerd config
func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error {
if c == nil || c.Tree == nil {
return fmt.Errorf("config is nil")
}
config := *c.Tree
config.Set("version", int64(1))
switch runc := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", "runc"}).(type) {
case *toml.Tree:
runc, _ = toml.Load(runc.String())
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name}, runc)
}
if config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name}) == nil {
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_type"}, c.RuntimeType)
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_root"}, "")
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_engine"}, "")
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
}
if len(c.ContainerAnnotations) > 0 {
annotations, err := (*Config)(c).getRuntimeAnnotations([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"})
if err != nil {
return err
}
annotations = append(c.ContainerAnnotations, annotations...)
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
}
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}, path)
if setAsDefault && c.UseDefaultRuntimeName {
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}, name)
} else if setAsDefault {
// Note: This is deprecated in containerd 1.4.0 and will be removed in 1.5.0
if config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime"}) == nil {
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_type"}, c.RuntimeType)
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_root"}, "")
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "runtime_engine"}, "")
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "privileged_without_host_devices"}, false)
}
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "BinaryName"}, path)
config.SetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "Runtime"}, path)
}
*c.Tree = config
return nil
}
// DefaultRuntime returns the default runtime for the cri-o config
func (c ConfigV1) DefaultRuntime() string {
if runtime, ok := c.GetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}).(string); ok {
return runtime
}
return ""
}
// RemoveRuntime removes a runtime from the docker config
func (c *ConfigV1) RemoveRuntime(name string) error {
if c == nil || c.Tree == nil {
return nil
}
config := *c.Tree
// If the specified runtime was set as the default runtime we need to remove the default runtime too.
runtimePath, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}).(string)
if !ok || runtimePath == "" {
runtimePath, _ = config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}).(string)
}
defaultRuntimePath, ok := config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "BinaryName"}).(string)
if !ok || defaultRuntimePath == "" {
defaultRuntimePath, _ = config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime", "options", "Runtime"}).(string)
}
if runtimePath != "" && defaultRuntimePath != "" && runtimePath == defaultRuntimePath {
config.DeletePath([]string{"plugins", "cri", "containerd", "default_runtime"})
}
config.DeletePath([]string{"plugins", "cri", "containerd", "runtimes", name})
if runtime, ok := config.GetPath([]string{"plugins", "cri", "containerd", "default_runtime_name"}).(string); ok {
if runtime == name {
config.DeletePath([]string{"plugins", "cri", "containerd", "default_runtime_name"})
}
}
runtimeConfigPath := []string{"plugins", "cri", "containerd", "runtimes", name}
for i := 0; i < len(runtimeConfigPath); i++ {
if runtimes, ok := config.GetPath(runtimeConfigPath[:len(runtimeConfigPath)-i]).(*toml.Tree); ok {
if len(runtimes.Keys()) == 0 {
config.DeletePath(runtimeConfigPath[:len(runtimeConfigPath)-i])
}
}
}
if len(config.Keys()) == 1 && config.Keys()[0] == "version" {
config.Delete("version")
}
*c.Tree = config
return nil
}
// Save wrotes the config to a file
func (c ConfigV1) Save(path string) (int64, error) {
return (Config)(c).Save(path)
}

View File

@@ -0,0 +1,161 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"fmt"
"os"
"github.com/pelletier/go-toml"
)
// AddRuntime adds a runtime to the containerd config
func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
if c == nil || c.Tree == nil {
return fmt.Errorf("config is nil")
}
config := *c.Tree
config.Set("version", int64(2))
switch runc := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", "runc"}).(type) {
case *toml.Tree:
runc, _ = toml.Load(runc.String())
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}, runc)
}
if config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}) == nil {
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_type"}, c.RuntimeType)
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_root"}, "")
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "runtime_engine"}, "")
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
}
if len(c.ContainerAnnotations) > 0 {
annotations, err := c.getRuntimeAnnotations([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"})
if err != nil {
return err
}
annotations = append(c.ContainerAnnotations, annotations...)
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
}
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
if setAsDefault {
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}, name)
}
*c.Tree = config
return nil
}
func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) {
if c == nil || c.Tree == nil {
return nil, nil
}
config := *c.Tree
if !config.HasPath(path) {
return nil, nil
}
annotationsI, ok := config.GetPath(path).([]interface{})
if !ok {
return nil, fmt.Errorf("invalid annotations: %v", annotationsI)
}
var annotations []string
for _, annotation := range annotationsI {
a, ok := annotation.(string)
if !ok {
return nil, fmt.Errorf("invalid annotation: %v", annotation)
}
annotations = append(annotations, a)
}
return annotations, nil
}
// DefaultRuntime returns the default runtime for the cri-o config
func (c Config) DefaultRuntime() string {
if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok {
return runtime
}
return ""
}
// RemoveRuntime removes a runtime from the docker config
func (c *Config) RemoveRuntime(name string) error {
if c == nil || c.Tree == nil {
return nil
}
config := *c.Tree
config.DeletePath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name})
if runtime, ok := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok {
if runtime == name {
config.DeletePath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"})
}
}
runtimePath := []string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name}
for i := 0; i < len(runtimePath); i++ {
if runtimes, ok := config.GetPath(runtimePath[:len(runtimePath)-i]).(*toml.Tree); ok {
if len(runtimes.Keys()) == 0 {
config.DeletePath(runtimePath[:len(runtimePath)-i])
}
}
}
if len(config.Keys()) == 1 && config.Keys()[0] == "version" {
config.Delete("version")
}
*c.Tree = config
return nil
}
// Save writes the config to the specified path
func (c Config) Save(path string) (int64, error) {
config := c.Tree
output, err := config.ToTomlString()
if err != nil {
return 0, fmt.Errorf("unable to convert to TOML: %v", err)
}
if len(output) == 0 {
err := os.Remove(path)
if err != nil {
return 0, fmt.Errorf("unable to remove empty file: %v", err)
}
return 0, nil
}
f, err := os.Create(path)
if err != nil {
return 0, fmt.Errorf("unable to open '%v' for writing: %v", path, err)
}
defer f.Close()
n, err := f.WriteString(output)
if err != nil {
return 0, fmt.Errorf("unable to write output: %v", err)
}
return int64(n), err
}

View File

@@ -0,0 +1,40 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine"
"github.com/pelletier/go-toml"
)
// Config represents the containerd config
type Config struct {
*toml.Tree
RuntimeType string
UseDefaultRuntimeName bool
ContainerAnnotations []string
}
// New creates a containerd config with the specified options
func New(opts ...Option) (engine.Interface, error) {
b := &builder{}
for _, opt := range opts {
opt(b)
}
return b.build()
}

View File

@@ -0,0 +1,149 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package containerd
import (
"fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine"
"github.com/pelletier/go-toml"
log "github.com/sirupsen/logrus"
)
const (
defaultRuntimeType = "io.containerd.runc.v2"
)
type builder struct {
path string
runtimeType string
useLegacyConfig bool
containerAnnotations []string
}
// Option defines a function that can be used to configure the config builder
type Option func(*builder)
// WithPath sets the path for the config builder
func WithPath(path string) Option {
return func(b *builder) {
b.path = path
}
}
// WithRuntimeType sets the runtime type for the config builder
func WithRuntimeType(runtimeType string) Option {
return func(b *builder) {
b.runtimeType = runtimeType
}
}
// WithUseLegacyConfig sets the useLegacyConfig flag for the config builder
func WithUseLegacyConfig(useLegacyConfig bool) Option {
return func(b *builder) {
b.useLegacyConfig = useLegacyConfig
}
}
// WithContainerAnnotations sets the container annotations for the config builder
func WithContainerAnnotations(containerAnnotations ...string) Option {
return func(b *builder) {
b.containerAnnotations = containerAnnotations
}
}
func (b *builder) build() (engine.Interface, error) {
if b.path == "" {
return nil, fmt.Errorf("config path is empty")
}
if b.runtimeType == "" {
b.runtimeType = defaultRuntimeType
}
config, err := loadConfig(b.path)
if err != nil {
return nil, fmt.Errorf("failed to load config: %v", err)
}
config.RuntimeType = b.runtimeType
config.UseDefaultRuntimeName = !b.useLegacyConfig
config.ContainerAnnotations = b.containerAnnotations
version, err := config.parseVersion(b.useLegacyConfig)
if err != nil {
return nil, fmt.Errorf("failed to parse config version: %v", err)
}
switch version {
case 1:
return (*ConfigV1)(config), nil
case 2:
return config, nil
}
return nil, fmt.Errorf("unsupported config version: %v", version)
}
// loadConfig loads the containerd config from disk
func loadConfig(config string) (*Config, error) {
log.Infof("Loading config: %v", config)
info, err := os.Stat(config)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
configFile := config
if os.IsNotExist(err) {
configFile = "/dev/null"
log.Infof("Config file does not exist, creating new one")
}
tomlConfig, err := toml.LoadFile(configFile)
if err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
cfg := Config{
Tree: tomlConfig,
}
return &cfg, nil
}
// parseVersion returns the version of the config
func (c *Config) parseVersion(useLegacyConfig bool) (int, error) {
defaultVersion := 2
if useLegacyConfig {
defaultVersion = 1
}
switch v := c.Get("version").(type) {
case nil:
switch len(c.Keys()) {
case 0: // No config exists, or the config file is empty, use version inferred from containerd
return defaultVersion, nil
default: // A config file exists, has content, and no version is set
return 1, nil
}
case int64:
return int(v), nil
default:
return -1, fmt.Errorf("unsupported type for version field: %v", v)
}
}

View File

@@ -0,0 +1,131 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package crio
import (
"fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine"
"github.com/pelletier/go-toml"
)
// Config represents the cri-o config
type Config toml.Tree
// New creates a cri-o config with the specified options
func New(opts ...Option) (engine.Interface, error) {
b := &builder{}
for _, opt := range opts {
opt(b)
}
return b.build()
}
// AddRuntime adds a new runtime to the crio config
func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
if c == nil {
return fmt.Errorf("config is nil")
}
config := (toml.Tree)(*c)
switch runc := config.Get("crio.runtime.runtimes.runc").(type) {
case *toml.Tree:
runc, _ = toml.Load(runc.String())
config.SetPath([]string{"crio", "runtime", "runtimes", name}, runc)
}
config.SetPath([]string{"crio", "runtime", "runtimes", name, "runtime_path"}, path)
config.SetPath([]string{"crio", "runtime", "runtimes", name, "runtime_type"}, "oci")
if setAsDefault {
config.SetPath([]string{"crio", "runtime", "default_runtime"}, name)
}
*c = (Config)(config)
return nil
}
// DefaultRuntime returns the default runtime for the cri-o config
func (c Config) DefaultRuntime() string {
config := (toml.Tree)(c)
if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok {
return runtime
}
return ""
}
// RemoveRuntime removes a runtime from the cri-o config
func (c *Config) RemoveRuntime(name string) error {
if c == nil {
return nil
}
config := (toml.Tree)(*c)
if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok {
if runtime == name {
config.DeletePath([]string{"crio", "runtime", "default_runtime"})
}
}
runtimeClassPath := []string{"crio", "runtime", "runtimes", name}
config.DeletePath(runtimeClassPath)
for i := 0; i < len(runtimeClassPath); i++ {
remainingPath := runtimeClassPath[:len(runtimeClassPath)-i]
if entry, ok := config.GetPath(remainingPath).(*toml.Tree); ok {
if len(entry.Keys()) != 0 {
break
}
config.DeletePath(remainingPath)
}
}
*c = (Config)(config)
return nil
}
// Save writes the config to the specified path
func (c Config) Save(path string) (int64, error) {
config := (toml.Tree)(c)
output, err := config.ToTomlString()
if err != nil {
return 0, fmt.Errorf("unable to convert to TOML: %v", err)
}
if len(output) == 0 {
err := os.Remove(path)
if err != nil {
return 0, fmt.Errorf("unable to remove empty file: %v", err)
}
return 0, nil
}
f, err := os.Create(path)
if err != nil {
return 0, fmt.Errorf("unable to open '%v' for writing: %v", path, err)
}
defer f.Close()
n, err := f.WriteString(output)
if err != nil {
return 0, fmt.Errorf("unable to write output: %v", err)
}
return int64(n), err
}

View File

@@ -0,0 +1,73 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package crio
import (
"fmt"
"os"
"github.com/pelletier/go-toml"
log "github.com/sirupsen/logrus"
)
type builder struct {
path string
}
// Option defines a function that can be used to configure the config builder
type Option func(*builder)
// WithPath sets the path for the config builder
func WithPath(path string) Option {
return func(b *builder) {
b.path = path
}
}
func (b *builder) build() (*Config, error) {
if b.path == "" {
empty := toml.Tree{}
return (*Config)(&empty), nil
}
return loadConfig(b.path)
}
// loadConfig loads the cri-o config from disk
func loadConfig(config string) (*Config, error) {
log.Infof("Loading config: %v", config)
info, err := os.Stat(config)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
configFile := config
if os.IsNotExist(err) {
configFile = "/dev/null"
log.Infof("Config file does not exist, creating new one")
}
cfg, err := toml.LoadFile(configFile)
if err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return (*Config)(cfg), nil
}

View File

@@ -0,0 +1,140 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"encoding/json"
"fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/engine"
)
const (
defaultDockerRuntime = "runc"
)
// Config defines a docker config file.
// TODO: This should not be public, but we need to access it from the tests in tools/container/docker
type Config map[string]interface{}
// New creates a docker config with the specified options
func New(opts ...Option) (engine.Interface, error) {
b := &builder{}
for _, opt := range opts {
opt(b)
}
return b.build()
}
// AddRuntime adds a new runtime to the docker config
func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
if c == nil {
return fmt.Errorf("config is nil")
}
config := *c
// Read the existing runtimes
runtimes := make(map[string]interface{})
if _, exists := config["runtimes"]; exists {
runtimes = config["runtimes"].(map[string]interface{})
}
// Add / update the runtime definitions
runtimes[name] = map[string]interface{}{
"path": path,
"args": []string{},
}
config["runtimes"] = runtimes
if setAsDefault {
config["default-runtime"] = name
}
*c = config
return nil
}
// DefaultRuntime returns the default runtime for the docker config
func (c Config) DefaultRuntime() string {
r, ok := c["default-runtime"].(string)
if !ok {
return ""
}
return r
}
// RemoveRuntime removes a runtime from the docker config
func (c *Config) RemoveRuntime(name string) error {
if c == nil {
return nil
}
config := *c
if _, exists := config["default-runtime"]; exists {
defaultRuntime := config["default-runtime"].(string)
if defaultRuntime == name {
config["default-runtime"] = defaultDockerRuntime
}
}
if _, exists := config["runtimes"]; exists {
runtimes := config["runtimes"].(map[string]interface{})
delete(runtimes, name)
if len(runtimes) == 0 {
delete(config, "runtimes")
}
}
*c = config
return nil
}
// Save writes the config to the specified path
func (c Config) Save(path string) (int64, error) {
output, err := json.MarshalIndent(c, "", " ")
if err != nil {
return 0, fmt.Errorf("unable to convert to JSON: %v", err)
}
if len(output) == 0 {
err := os.Remove(path)
if err != nil {
return 0, fmt.Errorf("unable to remove empty file: %v", err)
}
return 0, nil
}
f, err := os.Create(path)
if err != nil {
return 0, fmt.Errorf("unable to open %v for writing: %v", path, err)
}
defer f.Close()
n, err := f.WriteString(string(output))
if err != nil {
return 0, fmt.Errorf("unable to write output: %v", err)
}
return int64(n), nil
}

View File

@@ -26,7 +26,7 @@ import (
func TestUpdateConfigDefaultRuntime(t *testing.T) {
testCases := []struct {
config map[string]interface{}
config Config
runtimeName string
setAsDefault bool
expectedDefaultRuntimeName interface{}
@@ -63,7 +63,7 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) {
if tc.config == nil {
tc.config = make(map[string]interface{})
}
err := UpdateConfig(tc.config, tc.runtimeName, "", tc.setAsDefault)
err := tc.config.AddRuntime(tc.runtimeName, "", tc.setAsDefault)
require.NoError(t, err)
defaultRuntimeName := tc.config["default-runtime"]
@@ -74,7 +74,7 @@ func TestUpdateConfigDefaultRuntime(t *testing.T) {
func TestUpdateConfigRuntimes(t *testing.T) {
testCases := []struct {
config map[string]interface{}
config Config
runtimes map[string]string
expectedConfig map[string]interface{}
}{
@@ -198,7 +198,7 @@ func TestUpdateConfigRuntimes(t *testing.T) {
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
for runtimeName, runtimePath := range tc.runtimes {
err := UpdateConfig(tc.config, runtimeName, runtimePath, false)
err := tc.config.AddRuntime(runtimeName, runtimePath, false)
require.NoError(t, err)
}

View File

@@ -0,0 +1,80 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package docker
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
log "github.com/sirupsen/logrus"
)
type builder struct {
path string
}
// Option defines a function that can be used to configure the config builder
type Option func(*builder)
// WithPath sets the path for the config builder
func WithPath(path string) Option {
return func(b *builder) {
b.path = path
}
}
func (b *builder) build() (*Config, error) {
if b.path == "" {
empty := make(Config)
return &empty, nil
}
return loadConfig(b.path)
}
// loadConfig loads the docker config from disk
func loadConfig(configFilePath string) (*Config, error) {
log.Infof("Loading docker config from %v", configFilePath)
info, err := os.Stat(configFilePath)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
cfg := make(Config)
if os.IsNotExist(err) {
log.Infof("Config file does not exist, creating new one")
return &cfg, nil
}
readBytes, err := ioutil.ReadFile(configFilePath)
if err != nil {
return nil, fmt.Errorf("unable to read config: %v", err)
}
reader := bytes.NewReader(readBytes)
if err := json.NewDecoder(reader).Decode(&cfg); err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return &cfg, nil
}

66
internal/config/hook.go Normal file
View File

@@ -0,0 +1,66 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"fmt"
"github.com/pelletier/go-toml"
)
// RuntimeHookConfig stores the config options for the NVIDIA Container Runtime
type RuntimeHookConfig struct {
// Path specifies the path to the NVIDIA Container Runtime hook binary.
// If an executable name is specified, this will be resolved in the path.
Path string `toml:"path"`
// SkipModeDetection disables the mode check for the runtime hook.
SkipModeDetection bool `toml:"skip-mode-detection"`
}
// dummyHookConfig allows us to unmarshal only a RuntimeHookConfig from a *toml.Tree
type dummyHookConfig struct {
RuntimeHook RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
}
// getRuntimeHookConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getRuntimeHookConfigFrom(toml *toml.Tree) (*RuntimeHookConfig, error) {
cfg := GetDefaultRuntimeHookConfig()
if toml == nil {
return cfg, nil
}
d := dummyHookConfig{
RuntimeHook: *cfg,
}
if err := toml.Unmarshal(&d); err != nil {
return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err)
}
return &d.RuntimeHook, nil
}
// GetDefaultRuntimeHookConfig defines the default values for the config
func GetDefaultRuntimeHookConfig() *RuntimeHookConfig {
c := RuntimeHookConfig{
Path: NVIDIAContainerRuntimeHookExecutable,
SkipModeDetection: false,
}
return &c
}

View File

@@ -0,0 +1,43 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"github.com/opencontainers/runtime-spec/specs-go"
)
const (
capSysAdmin = "CAP_SYS_ADMIN"
)
// IsPrivileged returns true if the container is a privileged container.
func IsPrivileged(s *specs.Spec) bool {
if s.Process.Capabilities == nil {
return false
}
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
for _, c := range s.Process.Capabilities.Bounding {
if c == capSysAdmin {
return true
}
}
return false
}

View File

@@ -19,6 +19,7 @@ package config
import (
"fmt"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/pelletier/go-toml"
"github.com/sirupsen/logrus"
)
@@ -50,6 +51,10 @@ type modesConfig struct {
type cdiModeConfig struct {
// SpecDirs allows for the default spec dirs for CDI to be overridden
SpecDirs []string `toml:"spec-dirs"`
// DefaultKind sets the default kind to be used when constructing fully-qualified CDI device names
DefaultKind string `toml:"default-kind"`
// AnnotationPrefixes sets the allowed prefixes for CDI annotation-based device injection
AnnotationPrefixes []string `toml:"annotation-prefixes"`
}
type csvModeConfig struct {
@@ -94,6 +99,12 @@ func GetDefaultRuntimeConfig() *RuntimeConfig {
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{
cdi.AnnotationPrefix,
},
},
},
}

View File

@@ -58,7 +58,11 @@ func (d *charDevices) Devices() ([]Device, error) {
}
var devices []Device
for _, mount := range devicesAsMounts {
devices = append(devices, Device(mount))
device := Device{
HostPath: mount.HostPath,
Path: mount.Path,
}
devices = append(devices, device)
}
return devices, nil

View File

@@ -27,16 +27,16 @@ import (
// NewFromCSVFiles creates a discoverer for the specified CSV files. A logger is also supplied.
// The constructed discoverer is comprised of a list, with each element in the list being associated with a
// single CSV files.
func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discover, error) {
func NewFromCSVFiles(logger *logrus.Logger, files []string, driverRoot string) (Discover, error) {
if len(files) == 0 {
logger.Warnf("No CSV files specified")
return None{}, nil
}
symlinkLocator := lookup.NewSymlinkLocator(logger, root)
symlinkLocator := lookup.NewSymlinkLocator(logger, driverRoot)
locators := map[csv.MountSpecType]lookup.Locator{
csv.MountSpecDev: lookup.NewCharDeviceLocator(lookup.WithLogger(logger), lookup.WithRoot(root)),
csv.MountSpecDir: lookup.NewDirectoryLocator(logger, root),
csv.MountSpecDev: lookup.NewCharDeviceLocator(lookup.WithLogger(logger), lookup.WithRoot(driverRoot)),
csv.MountSpecDir: lookup.NewDirectoryLocator(logger, driverRoot),
// Libraries and symlinks are handled in the same way
csv.MountSpecLib: symlinkLocator,
csv.MountSpecSym: symlinkLocator,
@@ -52,7 +52,7 @@ func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discov
mountSpecs = append(mountSpecs, targets...)
}
return newFromMountSpecs(logger, locators, root, mountSpecs)
return newFromMountSpecs(logger, locators, driverRoot, mountSpecs)
}
// loadCSVFile loads the specified CSV file and returns the list of mount specs
@@ -71,7 +71,7 @@ func loadCSVFile(logger *logrus.Logger, filename string) ([]*csv.MountSpec, erro
// newFromMountSpecs creates a discoverer for the CSV file. A logger is also supplied.
// A list of csvDiscoverers is returned, with each being associated with a single MountSpecType.
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, root string, targets []*csv.MountSpec) (Discover, error) {
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, driverRoot string, targets []*csv.MountSpec) (Discover, error) {
if len(targets) == 0 {
return &None{}, nil
}
@@ -95,9 +95,9 @@ func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]loo
var m Discover
switch t {
case csv.MountSpecDev:
m = NewDeviceDiscoverer(logger, locator, root, candidatesByType[t])
m = NewDeviceDiscoverer(logger, locator, driverRoot, candidatesByType[t])
default:
m = NewMounts(logger, locator, root, candidatesByType[t])
m = NewMounts(logger, locator, driverRoot, candidatesByType[t])
}
discoverers = append(discoverers, m)

View File

@@ -18,7 +18,7 @@ package discover
// Config represents the configuration options for discovery
type Config struct {
Root string
DriverRoot string
NvidiaCTKPath string
}
@@ -32,6 +32,7 @@ type Device struct {
type Mount struct {
HostPath string
Path string
Options []string
}
// Hook represents a discovered hook.

View File

@@ -13,25 +13,25 @@ var _ Discover = &DiscoverMock{}
// DiscoverMock is a mock implementation of Discover.
//
// func TestSomethingThatUsesDiscover(t *testing.T) {
// func TestSomethingThatUsesDiscover(t *testing.T) {
//
// // make and configure a mocked Discover
// mockedDiscover := &DiscoverMock{
// DevicesFunc: func() ([]Device, error) {
// panic("mock out the Devices method")
// },
// HooksFunc: func() ([]Hook, error) {
// panic("mock out the Hooks method")
// },
// MountsFunc: func() ([]Mount, error) {
// panic("mock out the Mounts method")
// },
// }
// // make and configure a mocked Discover
// mockedDiscover := &DiscoverMock{
// DevicesFunc: func() ([]Device, error) {
// panic("mock out the Devices method")
// },
// HooksFunc: func() ([]Hook, error) {
// panic("mock out the Hooks method")
// },
// MountsFunc: func() ([]Mount, error) {
// panic("mock out the Mounts method")
// },
// }
//
// // use mockedDiscover in code that requires Discover
// // and then make assertions.
// // use mockedDiscover in code that requires Discover
// // and then make assertions.
//
// }
// }
type DiscoverMock struct {
// DevicesFunc mocks the Devices method.
DevicesFunc func() ([]Device, error)
@@ -78,7 +78,8 @@ func (mock *DiscoverMock) Devices() ([]Device, error) {
// DevicesCalls gets all the calls that were made to Devices.
// Check the length with:
// len(mockedDiscover.DevicesCalls())
//
// len(mockedDiscover.DevicesCalls())
func (mock *DiscoverMock) DevicesCalls() []struct {
} {
var calls []struct {
@@ -108,7 +109,8 @@ func (mock *DiscoverMock) Hooks() ([]Hook, error) {
// HooksCalls gets all the calls that were made to Hooks.
// Check the length with:
// len(mockedDiscover.HooksCalls())
//
// len(mockedDiscover.HooksCalls())
func (mock *DiscoverMock) HooksCalls() []struct {
} {
var calls []struct {
@@ -138,7 +140,8 @@ func (mock *DiscoverMock) Mounts() ([]Mount, error) {
// MountsCalls gets all the calls that were made to Mounts.
// Check the length with:
// len(mockedDiscover.MountsCalls())
//
// len(mockedDiscover.MountsCalls())
func (mock *DiscoverMock) MountsCalls() []struct {
} {
var calls []struct {

View File

@@ -20,48 +20,53 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
"github.com/sirupsen/logrus"
)
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
func NewGraphicsDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, cfg *Config) (Discover, error) {
root := cfg.Root
driverRoot := cfg.DriverRoot
mounts, err := NewGraphicsMountsDiscoverer(logger, root)
mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
}
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, root)
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
}
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, cfg)
xorg := optionalXorgDiscoverer(logger, driverRoot, cfg.NvidiaCTKPath)
discover := Merge(
Merge(drmDeviceNodes, drmByPathSymlinks),
mounts,
xorg,
)
return discover, nil
}
// NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan.
func NewGraphicsMountsDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
locator, err := lookup.NewLibraryLocator(logger, root)
func NewGraphicsMountsDiscoverer(logger *logrus.Logger, driverRoot string) (Discover, error) {
locator, err := lookup.NewLibraryLocator(logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct library locator: %v", err)
}
libraries := NewMounts(
logger,
locator,
root,
driverRoot,
[]string{
"libnvidia-egl-gbm.so",
},
@@ -71,10 +76,10 @@ func NewGraphicsMountsDiscoverer(logger *logrus.Logger, root string) (Discover,
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths("/etc", "/usr/share"),
),
root,
driverRoot,
[]string{
"glvnd/egl_vendor.d/10_nvidia.json",
"vulkan/icd.d/nvidia_icd.json",
@@ -96,7 +101,7 @@ type drmDevicesByPath struct {
None
logger *logrus.Logger
nvidiaCTKPath string
root string
driverRoot string
devicesFrom Discover
}
@@ -105,7 +110,7 @@ func newCreateDRMByPathSymlinks(logger *logrus.Logger, devices Discover, cfg *Co
d := drmDevicesByPath{
logger: logger,
nvidiaCTKPath: FindNvidiaCTK(logger, cfg.NvidiaCTKPath),
root: cfg.Root,
driverRoot: cfg.DriverRoot,
devicesFrom: devices,
}
@@ -152,7 +157,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
linkLocator := lookup.NewFileLocator(
lookup.WithLogger(d.logger),
lookup.WithRoot(d.root),
lookup.WithRoot(d.driverRoot),
)
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
if err != nil {
@@ -178,21 +183,21 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
}
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
func newDRMDeviceDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, root string) (Discover, error) {
func newDRMDeviceDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, driverRoot string) (Discover, error) {
allDevices := NewDeviceDiscoverer(
logger,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
lookup.WithRoot(driverRoot),
),
root,
driverRoot,
[]string{
"/dev/dri/card*",
"/dev/dri/renderD*",
},
)
filter, err := newDRMDeviceFilter(logger, devices, root)
filter, err := newDRMDeviceFilter(logger, devices, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
}
@@ -208,8 +213,8 @@ func newDRMDeviceDiscoverer(logger *logrus.Logger, devices image.VisibleDevices,
}
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, root string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(root)
func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, driverRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to read GPU information: %v", err)
}
@@ -243,6 +248,123 @@ func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, roo
return filter, nil
}
type xorgHooks struct {
libraries Discover
driverVersion string
nvidiaCTKPath string
}
var _ Discover = (*xorgHooks)(nil)
// optionalXorgDiscoverer creates a discoverer for Xorg libraries.
// If the creation of the discoverer fails, a None discoverer is returned.
func optionalXorgDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string) Discover {
xorg, err := newXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
if err != nil {
logger.Warnf("Failed to create Xorg discoverer: %v; skipping xorg libraries", err)
return None{}
}
return xorg
}
func newXorgDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string) (Discover, error) {
libCudaPaths, err := cuda.New(
cuda.WithLogger(logger),
cuda.WithDriverRoot(driverRoot),
).Locate(".*.*")
if err != nil {
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
}
libcudaPath := libCudaPaths[0]
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
if version == "" {
return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
}
libRoot := filepath.Dir(libcudaPath)
xorgLibs := NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths(libRoot, "/usr/lib/x86_64-linux-gnu"),
lookup.WithCount(1),
),
driverRoot,
[]string{
"nvidia/xorg/nvidia_drv.so",
fmt.Sprintf("nvidia/xorg/libglxserver_nvidia.so.%s", version),
},
)
xorgHooks := xorgHooks{
libraries: xorgLibs,
driverVersion: version,
nvidiaCTKPath: FindNvidiaCTK(logger, nvidiaCTKPath),
}
xorgConfg := NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths("/usr/share"),
),
driverRoot,
[]string{"X11/xorg.conf.d/10-nvidia.conf"},
)
d := Merge(
xorgLibs,
xorgConfg,
xorgHooks,
)
return d, nil
}
// Devices returns no devices for Xorg
func (m xorgHooks) Devices() ([]Device, error) {
return nil, nil
}
// Hooks returns a hook to create symlinks for Xorg libraries
func (m xorgHooks) Hooks() ([]Hook, error) {
mounts, err := m.libraries.Mounts()
if err != nil {
return nil, fmt.Errorf("failed to get mounts: %v", err)
}
if len(mounts) == 0 {
return nil, nil
}
var target string
for _, mount := range mounts {
filename := filepath.Base(mount.HostPath)
if filename == "libglxserver_nvidia.so."+m.driverVersion {
target = mount.Path
}
}
if target == "" {
return nil, nil
}
link := strings.TrimSuffix(target, "."+m.driverVersion)
links := []string{fmt.Sprintf("%s::%s", filepath.Base(target), link)}
symlinkHook := CreateCreateSymlinkHook(
m.nvidiaCTKPath,
links,
)
return symlinkHook.Hooks()
}
// Mounts returns the libraries required for Xorg
func (m xorgHooks) Mounts() ([]Mount, error) {
return nil, nil
}
// selectDeviceByPath is a filter that allows devices to be selected by the path
type selectDeviceByPath map[string]bool

View File

@@ -29,12 +29,47 @@ const (
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
)
var _ Discover = (*Hook)(nil)
// Devices returns an empty list of devices for a Hook discoverer.
func (h Hook) Devices() ([]Device, error) {
return nil, nil
}
// Mounts returns an empty list of mounts for a Hook discoverer.
func (h Hook) Mounts() ([]Mount, error) {
return nil, nil
}
// Hooks allows the Hook type to also implement the Discoverer interface.
// It returns a single hook
func (h Hook) Hooks() ([]Hook, error) {
return []Hook{h}, nil
}
// CreateCreateSymlinkHook creates a hook which creates a symlink from link -> target.
func CreateCreateSymlinkHook(nvidiaCTKPath string, links []string) Discover {
if len(links) == 0 {
return None{}
}
var args []string
for _, link := range links {
args = append(args, "--link", link)
}
return CreateNvidiaCTKHook(
nvidiaCTKPath,
"create-symlinks",
args...,
)
}
// CreateNvidiaCTKHook creates a hook which invokes the NVIDIA Container CLI hook subcommand.
func CreateNvidiaCTKHook(executable string, hookName string, additionalArgs ...string) Hook {
func CreateNvidiaCTKHook(nvidiaCTKPath string, hookName string, additionalArgs ...string) Hook {
return Hook{
Lifecycle: cdi.CreateContainerHook,
Path: executable,
Args: append([]string{filepath.Base(executable), "hook", hookName}, additionalArgs...),
Path: nvidiaCTKPath,
Args: append([]string{filepath.Base(nvidiaCTKPath), "hook", hookName}, additionalArgs...),
}
}
@@ -47,6 +82,9 @@ func FindNvidiaCTK(logger *logrus.Logger, nvidiaCTKPath string) string {
return nvidiaCTKPath
}
if nvidiaCTKPath == "" {
nvidiaCTKPath = nvidiaCTKExecutable
}
logger.Debugf("Locating NVIDIA Container Toolkit CLI as %v", nvidiaCTKPath)
lookup := lookup.NewExecutableLocator(logger, "")
hookPath := nvidiaCTKDefaultFilePath

View File

@@ -0,0 +1,60 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
)
func TestIPCMounts(t *testing.T) {
l := ipcMounts(
mounts{
logger: logrus.New(),
lookup: &lookup.LocatorMock{
LocateFunc: func(path string) ([]string, error) {
return []string{"/host/path"}, nil
},
},
required: []string{"target"},
},
)
mounts, err := l.Mounts()
require.NoError(t, err)
require.EqualValues(
t,
[]Mount{
{
HostPath: "/host/path",
Path: "/host/path",
Options: []string{
"ro",
"nosuid",
"nodev",
"bind",
"noexec",
},
},
},
mounts,
)
}

78
internal/discover/ipc.go Normal file
View File

@@ -0,0 +1,78 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
type ipcMounts mounts
// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
func NewIPCDiscoverer(logger *logrus.Logger, driverRoot string) (Discover, error) {
sockets := newMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths("/run", "/var/run"),
lookup.WithCount(1),
),
driverRoot,
[]string{
"/nvidia-persistenced/socket",
"/nvidia-fabricmanager/socket",
},
)
mps := newMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithCount(1),
),
driverRoot,
[]string{
"/tmp/nvidia-mps",
},
)
d := Merge(
(*ipcMounts)(sockets),
(*ipcMounts)(mps),
)
return d, nil
}
// Mounts returns the discovered mounts with "noexec" added to the mount options.
func (d *ipcMounts) Mounts() ([]Mount, error) {
mounts, err := (*mounts)(d).Mounts()
if err != nil {
return nil, err
}
var modifiedMounts []Mount
for _, m := range mounts {
mount := m
mount.Options = append(m.Options, "noexec")
modifiedMounts = append(modifiedMounts, mount)
}
return modifiedMounts, nil
}

View File

@@ -32,7 +32,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
logger, _ := testlog.NewNullLogger()
cfg := Config{
Root: "/",
DriverRoot: "/",
NvidiaCTKPath: testNvidiaCTKPath,
}

View File

@@ -43,6 +43,11 @@ var _ Discover = (*mounts)(nil)
// NewMounts creates a discoverer for the required mounts using the specified locator.
func NewMounts(logger *logrus.Logger, lookup lookup.Locator, root string, required []string) Discover {
return newMounts(logger, lookup, root, required)
}
// newMounts creates a discoverer for the required mounts using the specified locator.
func newMounts(logger *logrus.Logger, lookup lookup.Locator, root string, required []string) *mounts {
return &mounts{
logger: logger,
lookup: lookup,
@@ -93,6 +98,12 @@ func (d *mounts) Mounts() ([]Mount, error) {
uniqueMounts[p] = Mount{
HostPath: p,
Path: r,
Options: []string{
"ro",
"nosuid",
"nodev",
"bind",
},
}
}
}

View File

@@ -35,6 +35,14 @@ func TestMountsReturnsEmptyDevices(t *testing.T) {
}
func TestMounts(t *testing.T) {
mountOptions := []string{
"ro",
"nosuid",
"nodev",
"bind",
}
logger, logHook := testlog.NewNullLogger()
testCases := []struct {
@@ -70,7 +78,7 @@ func TestMounts(t *testing.T) {
},
required: []string{"required"},
},
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
expectedMounts: []Mount{{Path: "located", HostPath: "located", Options: mountOptions}},
},
{
description: "mounts removes located duplicates",
@@ -83,7 +91,7 @@ func TestMounts(t *testing.T) {
},
required: []string{"required0", "required1"},
},
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
expectedMounts: []Mount{{Path: "located", HostPath: "located", Options: mountOptions}},
},
{
description: "mounts skips located errors",
@@ -98,7 +106,7 @@ func TestMounts(t *testing.T) {
},
required: []string{"required0", "error", "required1"},
},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0", Options: mountOptions}, {Path: "required1", HostPath: "required1", Options: mountOptions}},
},
{
description: "mounts skips unlocated",
@@ -113,7 +121,7 @@ func TestMounts(t *testing.T) {
},
required: []string{"required0", "empty", "required1"},
},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0", Options: mountOptions}, {Path: "required1", HostPath: "required1", Options: mountOptions}},
},
{
description: "mounts adds multiple",
@@ -129,10 +137,10 @@ func TestMounts(t *testing.T) {
required: []string{"required0", "multiple", "required1"},
},
expectedMounts: []Mount{
{Path: "required0", HostPath: "required0"},
{Path: "multiple0", HostPath: "multiple0"},
{Path: "multiple1", HostPath: "multiple1"},
{Path: "required1", HostPath: "required1"},
{Path: "required0", HostPath: "required0", Options: mountOptions},
{Path: "multiple0", HostPath: "multiple0", Options: mountOptions},
{Path: "multiple1", HostPath: "multiple1", Options: mountOptions},
{Path: "required1", HostPath: "required1", Options: mountOptions},
},
},
{
@@ -147,7 +155,7 @@ func TestMounts(t *testing.T) {
required: []string{"required0", "multiple", "required1"},
},
expectedMounts: []Mount{
{Path: "/located", HostPath: "/some/root/located"},
{Path: "/located", HostPath: "/some/root/located", Options: mountOptions},
},
},
}

64
internal/dxcore/api.go Normal file
View File

@@ -0,0 +1,64 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package dxcore
import (
"github.com/NVIDIA/go-nvml/pkg/dl"
)
const (
libraryName = "libdxcore.so"
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
)
// dxcore stores a reference the dxcore dynamic library
var dxcore *context
// Init initializes the dxcore dynamic library
func Init() error {
c, err := initContext()
if err != nil {
return err
}
dxcore = c
return nil
}
// Shutdown closes the dxcore dynamic library
func Shutdown() error {
if dxcore != nil && dxcore.initialized != 0 {
dxcore.deinitContext()
}
return nil
}
// GetDriverStorePaths returns the list of driver store paths
func GetDriverStorePaths() []string {
var paths []string
selected := make(map[string]bool)
for i := 0; i < dxcore.getAdapterCount(); i++ {
path := dxcore.getAdapter(i).getDriverStorePath()
if selected[path] {
continue
}
selected[path] = true
paths = append(paths, path)
}
return paths
}

334
internal/dxcore/dxcore.c Normal file
View File

@@ -0,0 +1,334 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*/
#include <dlfcn.h>
#include <stdlib.h>
#include "dxcore.h"
// We define log_write as an empty macro to allow dxcore to remain unchanged.
#define log_write(...)
// We define the following macros to allow dxcore to remain largely unchanged.
#define log_info(msg) log_write('I', __FILE__, __LINE__, msg)
#define log_warn(msg) log_write('W', __FILE__, __LINE__, msg)
#define log_err(msg) log_write('E', __FILE__, __LINE__, msg)
#define log_infof(fmt, ...) log_write('I', __FILE__, __LINE__, fmt, __VA_ARGS__)
#define log_warnf(fmt, ...) log_write('W', __FILE__, __LINE__, fmt, __VA_ARGS__)
#define log_errf(fmt, ...) log_write('E', __FILE__, __LINE__, fmt, __VA_ARGS__)
#define DXCORE_MAX_PATH 260
/*
* List of components we expect to find in the driver store that we need to mount
*/
static const char * const dxcore_nvidia_driver_store_components[] = {
"libcuda.so.1.1", /* Core library for cuda support */
"libcuda_loader.so", /* Core library for cuda support on WSL */
"libnvidia-ptxjitcompiler.so.1", /* Core library for PTX Jit support */
"libnvidia-ml.so.1", /* Core library for nvml */
"libnvidia-ml_loader.so", /* Core library for nvml on WSL */
"nvidia-smi", /* nvidia-smi binary*/
"nvcubins.bin", /* Binary containing GPU code for cuda */
};
/*
* List of functions and structures we need to communicate with libdxcore.
* Documentation on these functions can be found on docs.microsoft.com in d3dkmthk.
*/
struct dxcore_enumAdapters2;
struct dxcore_queryAdapterInfo;
typedef int(*pfnDxcoreEnumAdapters2)(struct dxcore_enumAdapters2* pParams);
typedef int(*pfnDxcoreQueryAdapterInfo)(struct dxcore_queryAdapterInfo* pParams);
struct dxcore_lib {
void* hDxcoreLib;
pfnDxcoreEnumAdapters2 pDxcoreEnumAdapters2;
pfnDxcoreQueryAdapterInfo pDxcoreQueryAdapterInfo;
};
struct dxcore_adapterInfo
{
unsigned int hAdapter;
struct dxcore_luid AdapterLuid;
unsigned int NumOfSources;
unsigned int bPresentMoveRegionsPreferred;
};
struct dxcore_enumAdapters2
{
unsigned int NumAdapters;
struct dxcore_adapterInfo *pAdapters;
};
enum dxcore_kmtqueryAdapterInfoType
{
DXCORE_QUERYDRIVERVERSION = 13,
DXCORE_QUERYREGISTRY = 48,
};
enum dxcore_queryregistry_type {
DXCORE_QUERYREGISTRY_DRIVERSTOREPATH = 2,
DXCORE_QUERYREGISTRY_DRIVERIMAGEPATH = 3,
};
enum dxcore_queryregistry_status {
DXCORE_QUERYREGISTRY_STATUS_SUCCESS = 0,
DXCORE_QUERYREGISTRY_STATUS_BUFFER_OVERFLOW = 1,
DXCORE_QUERYREGISTRY_STATUS_FAIL = 2,
};
struct dxcore_queryregistry_info {
enum dxcore_queryregistry_type QueryType;
unsigned int QueryFlags;
wchar_t ValueName[DXCORE_MAX_PATH];
unsigned int ValueType;
unsigned int PhysicalAdapterIndex;
unsigned int OutputValueSize;
enum dxcore_queryregistry_status Status;
union {
unsigned long long OutputQword;
wchar_t Output;
};
};
struct dxcore_queryAdapterInfo
{
unsigned int hAdapter;
enum dxcore_kmtqueryAdapterInfoType Type;
void *pPrivateDriverData;
unsigned int PrivateDriverDataSize;
};
static int dxcore_query_adapter_info_helper(struct dxcore_lib* pLib,
unsigned int hAdapter,
enum dxcore_kmtqueryAdapterInfoType type,
void* pPrivateDriverDate,
unsigned int privateDriverDataSize)
{
struct dxcore_queryAdapterInfo queryAdapterInfo = { 0 };
queryAdapterInfo.hAdapter = hAdapter;
queryAdapterInfo.Type = type;
queryAdapterInfo.pPrivateDriverData = pPrivateDriverDate;
queryAdapterInfo.PrivateDriverDataSize = privateDriverDataSize;
return pLib->pDxcoreQueryAdapterInfo(&queryAdapterInfo);
}
static int dxcore_query_adapter_wddm_version(struct dxcore_lib* pLib, unsigned int hAdapter, unsigned int* version)
{
return dxcore_query_adapter_info_helper(pLib,
hAdapter,
DXCORE_QUERYDRIVERVERSION,
(void*)version,
sizeof(*version));
}
static int dxcore_query_adapter_driverstore(struct dxcore_lib* pLib, unsigned int hAdapter, char** ppDriverStorePath)
{
struct dxcore_queryregistry_info params = {0};
struct dxcore_queryregistry_info* pValue = NULL;
wchar_t* pOutput;
size_t outputSizeInBytes;
size_t outputSize;
params.QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH;
if (dxcore_query_adapter_info_helper(pLib,
hAdapter,
DXCORE_QUERYREGISTRY,
(void*)&params,
sizeof(params)))
{
log_err("Failed to query driver store path size for the WDDM Adapter");
return (-1);
}
if (params.OutputValueSize > DXCORE_MAX_PATH * sizeof(wchar_t)) {
log_err("The driver store path size returned by dxcore is not valid");
return (-1);
}
outputSizeInBytes = (size_t)params.OutputValueSize;
outputSize = outputSizeInBytes / sizeof(wchar_t);
pValue = calloc(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes + sizeof(wchar_t), 1);
if (!pValue) {
log_err("Out of memory while allocating temp buffer to query adapter info");
return (-1);
}
pValue->QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH;
pValue->OutputValueSize = (unsigned int)outputSizeInBytes;
if (dxcore_query_adapter_info_helper(pLib,
hAdapter,
DXCORE_QUERYREGISTRY,
(void*)pValue,
(unsigned int)(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes)))
{
log_err("Failed to query driver store path data for the WDDM Adapter");
free(pValue);
return (-1);
}
pOutput = (wchar_t*)(&pValue->Output);
// Make sure no matter what happened the wchar_t string is null terminated
pOutput[outputSize] = L'\0';
// Convert the output into a regular c string
*ppDriverStorePath = (char*)calloc(outputSize + 1, sizeof(char));
if (!*ppDriverStorePath) {
log_err("Out of memory while allocating the buffer for the driver store path");
free(pValue);
return (-1);
}
wcstombs(*ppDriverStorePath, pOutput, outputSize);
free(pValue);
return 0;
}
static void dxcore_add_adapter(struct dxcore_context* pCtx, struct dxcore_lib* pLib, struct dxcore_adapterInfo *pAdapterInfo)
{
unsigned int wddmVersion = 0;
char* driverStorePath = NULL;
log_infof("Creating a new WDDM Adapter for hAdapter:%x luid:%llx", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid));
if (dxcore_query_adapter_wddm_version(pLib, pAdapterInfo->hAdapter, &wddmVersion)) {
log_err("Failed to query the WDDM version for the specified adapter. Skipping it.");
return;
}
if (wddmVersion < 2700) {
log_err("Found a WDDM adapter running a driver with pre-WDDM 2.7 . Skipping it.");
return;
}
if (dxcore_query_adapter_driverstore(pLib, pAdapterInfo->hAdapter, &driverStorePath)) {
log_err("Failed to query driver store path for the WDDM Adapter . Skipping it.");
return;
}
// We got all the info we needed. Adding it to the tracking structure.
{
struct dxcore_adapter* newList;
newList = realloc(pCtx->adapterList, sizeof(struct dxcore_adapter) * (pCtx->adapterCount + 1));
if (!newList) {
log_err("Out of memory when trying to add a new WDDM Adapter to the list of valid adapters");
free(driverStorePath);
return;
}
pCtx->adapterList = newList;
pCtx->adapterList[pCtx->adapterCount].hAdapter = pAdapterInfo->hAdapter;
pCtx->adapterList[pCtx->adapterCount].pDriverStorePath = driverStorePath;
pCtx->adapterList[pCtx->adapterCount].wddmVersion = wddmVersion;
pCtx->adapterCount++;
}
log_infof("Adding new adapter via dxcore hAdapter:%x luid:%llx wddm version:%d", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid), wddmVersion);
}
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
{
struct dxcore_enumAdapters2 params = {0};
unsigned int adapterIndex = 0;
params.NumAdapters = 0;
params.pAdapters = NULL;
if (pLib->pDxcoreEnumAdapters2(&params)) {
log_err("Failed to enumerate adapters via dxcore");
return;
}
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
if (pLib->pDxcoreEnumAdapters2(&params)) {
free(params.pAdapters);
log_err("Failed to enumerate adapters via dxcore");
return;
}
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
dxcore_add_adapter(pCtx, pLib, &params.pAdapters[adapterIndex]);
}
free(params.pAdapters);
}
int dxcore_init_context(struct dxcore_context* pCtx)
{
struct dxcore_lib lib = {0};
pCtx->initialized = 0;
pCtx->adapterCount = 0;
pCtx->adapterList = NULL;
lib.hDxcoreLib = dlopen("libdxcore.so", RTLD_LAZY);
if (!lib.hDxcoreLib) {
goto error;
}
lib.pDxcoreEnumAdapters2 = (pfnDxcoreEnumAdapters2)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters2");
if (!lib.pDxcoreEnumAdapters2) {
log_err("dxcore library is present but the symbol D3DKMTEnumAdapters2 is missing");
goto error;
}
lib.pDxcoreQueryAdapterInfo = (pfnDxcoreQueryAdapterInfo)dlsym(lib.hDxcoreLib, "D3DKMTQueryAdapterInfo");
if (!lib.pDxcoreQueryAdapterInfo) {
log_err("dxcore library is present but the symbol D3DKMTQueryAdapterInfo is missing");
goto error;
}
dxcore_enum_adapters(pCtx, &lib);
log_info("dxcore layer initialized successfully");
pCtx->initialized = 1;
dlclose(lib.hDxcoreLib);
return 0;
error:
dxcore_deinit_context(pCtx);
if (lib.hDxcoreLib)
dlclose(lib.hDxcoreLib);
return (-1);
}
static void dxcore_deinit_adapter(struct dxcore_adapter* pAdapter)
{
if (!pAdapter)
return;
free(pAdapter->pDriverStorePath);
}
void dxcore_deinit_context(struct dxcore_context* pCtx)
{
unsigned int adapterIndex = 0;
if (!pCtx)
return;
for (adapterIndex = 0; adapterIndex < pCtx->adapterCount; adapterIndex++) {
dxcore_deinit_adapter(&pCtx->adapterList[adapterIndex]);
}
free(pCtx->adapterList);
pCtx->initialized = 0;
}

59
internal/dxcore/dxcore.go Normal file
View File

@@ -0,0 +1,59 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package dxcore
/*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#include <dxcore.h>
*/
import "C"
import (
"fmt"
"unsafe"
)
type context C.struct_dxcore_context
type adapter C.struct_dxcore_adapter
// initContext initializes the dxcore context and populates the list of adapters.
func initContext() (*context, error) {
cContext := C.struct_dxcore_context{}
if C.dxcore_init_context(&cContext) != 0 {
return nil, fmt.Errorf("failed to initialize dxcore context")
}
c := (*context)(&cContext)
return c, nil
}
// deinitContext deinitializes the dxcore context and frees the list of adapters.
func (c context) deinitContext() {
cContext := C.struct_dxcore_context(c)
C.dxcore_deinit_context(&cContext)
}
func (c context) getAdapterCount() int {
return int(c.adapterCount)
}
func (c context) getAdapter(index int) adapter {
arrayPointer := (*[1 << 30]C.struct_dxcore_adapter)(unsafe.Pointer(c.adapterList))
return adapter(arrayPointer[index])
}
func (a adapter) getDriverStorePath() string {
return C.GoString(a.pDriverStorePath)
}

39
internal/dxcore/dxcore.h Normal file
View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*/
#ifndef HEADER_DXCORE_H_
#define HEADER_DXCORE_H_
#define MAX_DXCORE_DRIVERSTORE_LIBRAIRIES (16)
struct dxcore_luid
{
unsigned int lowPart;
int highPart;
};
struct dxcore_adapter
{
unsigned int hAdapter;
unsigned int wddmVersion;
char* pDriverStorePath;
unsigned int driverStoreComponentCount;
const char* pDriverStoreComponents[MAX_DXCORE_DRIVERSTORE_LIBRAIRIES];
struct dxcore_context *pContext;
};
struct dxcore_context
{
unsigned int adapterCount;
struct dxcore_adapter *adapterList;
int initialized;
};
int dxcore_init_context(struct dxcore_context* pDxcore_context);
void dxcore_deinit_context(struct dxcore_context* pDxcore_context);
#endif // HEADER_DXCORE_H_

View File

@@ -40,12 +40,7 @@ func (d mount) toSpec() *specs.Mount {
s := specs.Mount{
HostPath: d.HostPath,
ContainerPath: d.Path,
Options: []string{
"ro",
"nosuid",
"nodev",
"bind",
},
Options: d.Options,
}
return &s

View File

@@ -18,6 +18,7 @@ package devices
import (
"bufio"
"errors"
"fmt"
"io"
"os"
@@ -72,7 +73,14 @@ func (d devices) Get(name Name) (Major, bool) {
// GetNVIDIADevices returns the set of NVIDIA Devices on the machine
func GetNVIDIADevices() (Devices, error) {
devicesFile, err := os.Open(procDevicesPath)
return nvidiaDevices(procDevicesPath)
}
// nvidiaDevices returns the set of NVIDIA Devices from the specified devices file.
// This is useful for testing since we may be testing on a system where `/proc/devices` does
// contain a reference to NVIDIA devices.
func nvidiaDevices(devicesPath string) (Devices, error) {
devicesFile, err := os.Open(devicesPath)
if os.IsNotExist(err) {
return nil, nil
}
@@ -81,20 +89,28 @@ func GetNVIDIADevices() (Devices, error) {
}
defer devicesFile.Close()
return nvidiaDeviceFrom(devicesFile), nil
return nvidiaDeviceFrom(devicesFile)
}
func nvidiaDeviceFrom(reader io.Reader) devices {
var errNoNvidiaDevices = errors.New("no NVIDIA devices found")
func nvidiaDeviceFrom(reader io.Reader) (devices, error) {
allDevices := devicesFrom(reader)
nvidiaDevices := make(devices)
var hasNvidiaDevices bool
for n, d := range allDevices {
if !strings.HasPrefix(string(n), nvidiaDevicePrefix) {
continue
}
nvidiaDevices[n] = d
hasNvidiaDevices = true
}
return nvidiaDevices
if !hasNvidiaDevices {
return nil, errNoNvidiaDevices
}
return nvidiaDevices, nil
}
func devicesFrom(reader io.Reader) devices {

View File

@@ -45,21 +45,23 @@ func TestNvidiaDevices(t *testing.T) {
func TestProcessDeviceFile(t *testing.T) {
testCases := []struct {
lines []string
expected devices
lines []string
expected devices
expectedError error
}{
{[]string{}, make(devices)},
{[]string{"Not a valid line:"}, make(devices)},
{[]string{"195 nvidia-frontend"}, devices{"nvidia-frontend": 195}},
{[]string{"195 nvidia-frontend", "235 nvidia-caps"}, devices{"nvidia-frontend": 195, "nvidia-caps": 235}},
{[]string{" 195 nvidia-frontend"}, devices{"nvidia-frontend": 195}},
{[]string{"Not a valid line:", "", "195 nvidia-frontend"}, devices{"nvidia-frontend": 195}},
{[]string{"195 not-nvidia-frontend"}, make(devices)},
{lines: []string{}, expectedError: errNoNvidiaDevices},
{lines: []string{"Not a valid line:"}, expectedError: errNoNvidiaDevices},
{lines: []string{"195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
{lines: []string{"195 nvidia-frontend", "235 nvidia-caps"}, expected: devices{"nvidia-frontend": 195, "nvidia-caps": 235}},
{lines: []string{" 195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
{lines: []string{"Not a valid line:", "", "195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
{lines: []string{"195 not-nvidia-frontend"}, expectedError: errNoNvidiaDevices},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) {
contents := strings.NewReader(strings.Join(tc.lines, "\n"))
d := nvidiaDeviceFrom(contents)
d, err := nvidiaDeviceFrom(contents)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expected, d)
})

View File

@@ -307,11 +307,7 @@ func (c *ldcache) resolve(target string) (string, error) {
link = filepath.Join(filepath.Dir(target), link)
}
// Ensure that the returned path is relative to the root.
link = filepath.Join(c.root, link)
c.logger.Debugf("Resolved link: '%v' => '%v'", name, link)
return link, nil
return c.resolve(link)
}
// bytesToString converts a byte slice to a string.

View File

@@ -0,0 +1,104 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package cuda
import (
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
type cudaLocator struct {
logger *logrus.Logger
driverRoot string
}
// Options is a function that configures a cudaLocator.
type Options func(*cudaLocator)
// WithLogger is an option that configures the logger used by the locator.
func WithLogger(logger *logrus.Logger) Options {
return func(c *cudaLocator) {
c.logger = logger
}
}
// WithDriverRoot is an option that configures the driver root used by the locator.
func WithDriverRoot(driverRoot string) Options {
return func(c *cudaLocator) {
c.driverRoot = driverRoot
}
}
// New creates a new CUDA library locator.
func New(opts ...Options) lookup.Locator {
c := &cudaLocator{}
for _, opt := range opts {
opt(c)
}
if c.logger == nil {
c.logger = logrus.StandardLogger()
}
if c.driverRoot == "" {
c.driverRoot = "/"
}
return c
}
// Locate returns the path to the libcuda.so.RMVERSION file.
// libcuda.so is prefixed to the specified pattern.
func (l *cudaLocator) Locate(pattern string) ([]string, error) {
ldcacheLocator, err := lookup.NewLibraryLocator(
l.logger,
l.driverRoot,
)
if err != nil {
l.logger.Debugf("Failed to create LDCache locator: %v", err)
}
fullPattern := "libcuda.so" + pattern
candidates, err := ldcacheLocator.Locate("libcuda.so")
if err == nil {
for _, c := range candidates {
if match, err := filepath.Match(fullPattern, filepath.Base(c)); err != nil || !match {
l.logger.Debugf("Skipping non-matching candidate %v: %v", c, err)
continue
}
return []string{c}, nil
}
}
l.logger.Debugf("Could not locate %q in LDCache: Checking predefined library paths.", pattern)
pathLocator := lookup.NewFileLocator(
lookup.WithLogger(l.logger),
lookup.WithRoot(l.driverRoot),
lookup.WithSearchPaths(
"/usr/lib64",
"/usr/lib/x86_64-linux-gnu",
"/usr/lib/aarch64-linux-gnu",
"/usr/lib/x86_64-linux-gnu/nvidia/current",
"/usr/lib/aarch64-linux-gnu/nvidia/current",
),
lookup.WithCount(1),
)
return pathLocator.Locate(fullPattern)
}

View File

@@ -40,6 +40,7 @@ func NewLibraryLocator(logger *log.Logger, root string) (Locator, error) {
}
l := library{
logger: logger,
symlink: NewSymlinkLocator(logger, root),
cache: cache,
}

View File

@@ -13,29 +13,23 @@ var _ Locator = &LocatorMock{}
// LocatorMock is a mock implementation of Locator.
//
// func TestSomethingThatUsesLocator(t *testing.T) {
// func TestSomethingThatUsesLocator(t *testing.T) {
//
// // make and configure a mocked Locator
// mockedLocator := &LocatorMock{
// LocateFunc: func(s string) ([]string, error) {
// panic("mock out the Locate method")
// },
// RelativeFunc: func(s string) (string, error) {
// panic("mock out the Relative method")
// },
// }
// // make and configure a mocked Locator
// mockedLocator := &LocatorMock{
// LocateFunc: func(s string) ([]string, error) {
// panic("mock out the Locate method")
// },
// }
//
// // use mockedLocator in code that requires Locator
// // and then make assertions.
// // use mockedLocator in code that requires Locator
// // and then make assertions.
//
// }
// }
type LocatorMock struct {
// LocateFunc mocks the Locate method.
LocateFunc func(s string) ([]string, error)
// RelativeFunc mocks the Relative method.
RelativeFunc func(s string) (string, error)
// calls tracks calls to the methods.
calls struct {
// Locate holds details about calls to the Locate method.
@@ -43,14 +37,8 @@ type LocatorMock struct {
// S is the s argument value.
S string
}
// Relative holds details about calls to the Relative method.
Relative []struct {
// S is the s argument value.
S string
}
}
lockLocate sync.RWMutex
lockRelative sync.RWMutex
lockLocate sync.RWMutex
}
// Locate calls LocateFunc.
@@ -75,7 +63,8 @@ func (mock *LocatorMock) Locate(s string) ([]string, error) {
// LocateCalls gets all the calls that were made to Locate.
// Check the length with:
// len(mockedLocator.LocateCalls())
//
// len(mockedLocator.LocateCalls())
func (mock *LocatorMock) LocateCalls() []struct {
S string
} {
@@ -87,38 +76,3 @@ func (mock *LocatorMock) LocateCalls() []struct {
mock.lockLocate.RUnlock()
return calls
}
// Relative calls RelativeFunc.
func (mock *LocatorMock) Relative(s string) (string, error) {
callInfo := struct {
S string
}{
S: s,
}
mock.lockRelative.Lock()
mock.calls.Relative = append(mock.calls.Relative, callInfo)
mock.lockRelative.Unlock()
if mock.RelativeFunc == nil {
var (
sOut string
errOut error
)
return sOut, errOut
}
return mock.RelativeFunc(s)
}
// RelativeCalls gets all the calls that were made to Relative.
// Check the length with:
// len(mockedLocator.RelativeCalls())
func (mock *LocatorMock) RelativeCalls() []struct {
S string
} {
var calls []struct {
S string
}
mock.lockRelative.RLock()
calls = mock.calls.Relative
mock.lockRelative.RUnlock()
return calls
}

View File

@@ -38,7 +38,7 @@ type cdiModifier struct {
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES enviroment variable is
// used to select the devices to include.
func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
devices, err := getDevicesFromSpec(ociSpec)
devices, err := getDevicesFromSpec(logger, ociSpec, cfg)
if err != nil {
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
}
@@ -46,6 +46,7 @@ func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
logger.Debugf("No devices requested; no modification required.")
return nil, nil
}
logger.Debugf("Creating CDI modifier for devices: %v", devices)
specDirs := cdi.DefaultSpecDirs
if len(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.SpecDirs) > 0 {
@@ -61,38 +62,82 @@ func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
return m, nil
}
func getDevicesFromSpec(ociSpec oci.Spec) ([]string, error) {
func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Config) ([]string, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
}
image, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
envDevices := image.DevicesFromEnvvars(visibleDevicesEnvvar)
_, annotationDevices, err := cdi.ParseAnnotations(rawSpec.Annotations)
annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations)
if err != nil {
return nil, fmt.Errorf("failed to parse container annotations: %v", err)
}
uniqueDevices := make(map[string]struct{})
for _, name := range append(envDevices.List(), annotationDevices...) {
if !cdi.IsQualifiedName(name) {
name = cdi.QualifiedName("nvidia.com", "gpu", name)
}
uniqueDevices[name] = struct{}{}
if len(annotationDevices) > 0 {
return annotationDevices, nil
}
container, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar)
var devices []string
for name := range uniqueDevices {
seen := make(map[string]bool)
for _, name := range envDevices.List() {
if !cdi.IsQualifiedName(name) {
name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name)
}
if seen[name] {
logger.Debugf("Ignoring duplicate device %q", name)
continue
}
devices = append(devices, name)
}
return devices, nil
if len(devices) == 0 {
return nil, nil
}
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
return devices, nil
}
logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES: %v", devices)
return nil, nil
}
// getAnnotationDevices returns a list of devices specified in the annotations.
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
// The list of returned devices is deduplicated.
func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]string, error) {
devicesByKey := make(map[string][]string)
for key, value := range annotations {
for _, prefix := range prefixes {
if strings.HasPrefix(key, prefix) {
devicesByKey[key] = strings.Split(value, ",")
}
}
}
seen := make(map[string]bool)
var annotationDevices []string
for key, devices := range devicesByKey {
for _, device := range devices {
if !cdi.IsQualifiedName(device) {
return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key)
}
if seen[device] {
continue
}
annotationDevices = append(annotationDevices, device)
seen[device] = true
}
}
return annotationDevices, nil
}
// Modify loads the CDI registry and injects the specified CDI devices into the OCI runtime specification.
@@ -105,21 +150,8 @@ func (m cdiModifier) Modify(spec *specs.Spec) error {
m.logger.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
}
devices := m.devices
for _, d := range devices {
if d == "nvidia.com/gpu=all" {
devices = []string{}
for _, candidate := range registry.DeviceDB().ListDevices() {
if strings.HasPrefix(candidate, "nvidia.com/gpu=") {
devices = append(devices, candidate)
}
}
break
}
}
m.logger.Debugf("Injecting devices using CDI: %v", devices)
_, err := registry.InjectDevices(spec, devices...)
m.logger.Debugf("Injecting devices using CDI: %v", m.devices)
_, err := registry.InjectDevices(spec, m.devices...)
if err != nil {
return fmt.Errorf("failed to inject CDI devices: %v", err)
}

View File

@@ -0,0 +1,92 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetAnnotationDevices(t *testing.T) {
testCases := []struct {
description string
prefixes []string
annotations map[string]string
expectedDevices []string
expectedError error
}{
{
description: "no annotations",
},
{
description: "no matching annotations",
prefixes: []string{"not-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
},
},
{
description: "single matching annotation",
prefixes: []string{"prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
},
expectedDevices: []string{"example.com/device=bar"},
},
{
description: "multiple matching annotations",
prefixes: []string{"prefix/", "another-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
"another-prefix/bar": "example.com/device=baz",
},
expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
},
{
description: "multiple matching annotations with duplicate devices",
prefixes: []string{"prefix/", "another-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
"another-prefix/bar": "example.com/device=bar",
},
expectedDevices: []string{"example.com/device=bar"},
},
{
description: "invalid devices",
prefixes: []string{"prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device",
},
expectedError: fmt.Errorf("invalid device %q", "example.com/device"),
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
devices, err := getAnnotationDevices(tc.prefixes, tc.annotations)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.ElementsMatch(t, tc.expectedDevices, devices)
})
}
}

View File

@@ -62,7 +62,7 @@ func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
logger.Infof("Constructing modifier from config: %+v", *cfg)
config := &discover.Config{
Root: cfg.NVIDIAContainerCLIConfig.Root,
DriverRoot: cfg.NVIDIAContainerCLIConfig.Root,
NvidiaCTKPath: cfg.NVIDIACTKConfig.Path,
}
@@ -79,7 +79,7 @@ func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
csvFiles = csv.BaseFilesOnly(csvFiles)
}
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.DriverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
}

View File

@@ -45,7 +45,7 @@ func NewGraphicsModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.
}
config := &discover.Config{
Root: cfg.NVIDIAContainerCLIConfig.Root,
DriverRoot: cfg.NVIDIAContainerCLIConfig.Root,
NvidiaCTKPath: cfg.NVIDIACTKConfig.Path,
}
d, err := discover.NewGraphicsDiscoverer(

View File

@@ -17,10 +17,8 @@
package modifier
import (
"fmt"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
@@ -28,8 +26,11 @@ import (
// NewStableRuntimeModifier creates an OCI spec modifier that inserts the NVIDIA Container Runtime Hook into an OCI
// spec. The specified logger is used to capture log output.
func NewStableRuntimeModifier(logger *logrus.Logger) oci.SpecModifier {
m := stableRuntimeModifier{logger: logger}
func NewStableRuntimeModifier(logger *logrus.Logger, nvidiaContainerRuntimeHookPath string) oci.SpecModifier {
m := stableRuntimeModifier{
logger: logger,
nvidiaContainerRuntimeHookPath: nvidiaContainerRuntimeHookPath,
}
return &m
}
@@ -37,7 +38,8 @@ func NewStableRuntimeModifier(logger *logrus.Logger) oci.SpecModifier {
// stableRuntimeModifier modifies an OCI spec inplace, inserting the nvidia-container-runtime-hook as a
// prestart hook. If the hook is already present, no modification is made.
type stableRuntimeModifier struct {
logger *logrus.Logger
logger *logrus.Logger
nvidiaContainerRuntimeHookPath string
}
// Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook
@@ -53,18 +55,9 @@ func (m stableRuntimeModifier) Modify(spec *specs.Spec) error {
}
}
// We create a locator and look for the NVIDIA Container Runtime Hook in the path.
candidates, err := lookup.NewExecutableLocator(m.logger, "").Locate(config.NVIDIAContainerRuntimeHookExecutable)
if err != nil {
return fmt.Errorf("failed to locate NVIDIA Container Runtime Hook: %v", err)
}
path := candidates[0]
if len(candidates) > 1 {
m.logger.Debugf("Using %v from multiple NVIDIA Container Runtime Hook candidates: %v", path, candidates)
}
path := m.nvidiaContainerRuntimeHookPath
m.logger.Infof("Using prestart hook path: %v", path)
args := []string{path}
args := []string{filepath.Base(path)}
if spec.Hooks == nil {
spec.Hooks = &specs.Hooks{}
}

View File

@@ -79,7 +79,7 @@ func TestAddHookModifier(t *testing.T) {
Prestart: []specs.Hook{
{
Path: testHookPath,
Args: []string{testHookPath, "prestart"},
Args: []string{"nvidia-container-runtime-hook", "prestart"},
},
},
},
@@ -95,7 +95,7 @@ func TestAddHookModifier(t *testing.T) {
Prestart: []specs.Hook{
{
Path: testHookPath,
Args: []string{testHookPath, "prestart"},
Args: []string{"nvidia-container-runtime-hook", "prestart"},
},
},
},
@@ -141,7 +141,7 @@ func TestAddHookModifier(t *testing.T) {
},
{
Path: testHookPath,
Args: []string{testHookPath, "prestart"},
Args: []string{"nvidia-container-runtime-hook", "prestart"},
},
},
},
@@ -154,7 +154,7 @@ func TestAddHookModifier(t *testing.T) {
t.Run(tc.description, func(t *testing.T) {
m := NewStableRuntimeModifier(logger)
m := NewStableRuntimeModifier(logger, testHookPath)
err := m.Modify(&tc.spec)
if tc.expectedError != nil {

View File

@@ -13,19 +13,19 @@ var _ Runtime = &RuntimeMock{}
// RuntimeMock is a mock implementation of Runtime.
//
// func TestSomethingThatUsesRuntime(t *testing.T) {
// func TestSomethingThatUsesRuntime(t *testing.T) {
//
// // make and configure a mocked Runtime
// mockedRuntime := &RuntimeMock{
// ExecFunc: func(strings []string) error {
// panic("mock out the Exec method")
// },
// }
// // make and configure a mocked Runtime
// mockedRuntime := &RuntimeMock{
// ExecFunc: func(strings []string) error {
// panic("mock out the Exec method")
// },
// }
//
// // use mockedRuntime in code that requires Runtime
// // and then make assertions.
// // use mockedRuntime in code that requires Runtime
// // and then make assertions.
//
// }
// }
type RuntimeMock struct {
// ExecFunc mocks the Exec method.
ExecFunc func(strings []string) error
@@ -62,7 +62,8 @@ func (mock *RuntimeMock) Exec(strings []string) error {
// ExecCalls gets all the calls that were made to Exec.
// Check the length with:
// len(mockedRuntime.ExecCalls())
//
// len(mockedRuntime.ExecCalls())
func (mock *RuntimeMock) ExecCalls() []struct {
Strings []string
} {

View File

@@ -14,27 +14,26 @@
# limitations under the License.
*/
package runtime
package oci
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
log "github.com/sirupsen/logrus"
)
type modifyingRuntimeWrapper struct {
logger *log.Logger
runtime oci.Runtime
ociSpec oci.Spec
modifier oci.SpecModifier
runtime Runtime
ociSpec Spec
modifier SpecModifier
}
var _ oci.Runtime = (*modifyingRuntimeWrapper)(nil)
var _ Runtime = (*modifyingRuntimeWrapper)(nil)
// NewModifyingRuntimeWrapper creates a runtime wrapper that applies the specified modifier to the OCI specification
// before invoking the wrapped runtime. If the modifier is nil, the input runtime is returned.
func NewModifyingRuntimeWrapper(logger *log.Logger, runtime oci.Runtime, spec oci.Spec, modifier oci.SpecModifier) oci.Runtime {
func NewModifyingRuntimeWrapper(logger *log.Logger, runtime Runtime, spec Spec, modifier SpecModifier) Runtime {
if modifier == nil {
logger.Infof("Using low-level runtime with no modification")
return runtime
@@ -52,7 +51,7 @@ func NewModifyingRuntimeWrapper(logger *log.Logger, runtime oci.Runtime, spec oc
// Exec checks whether a modification of the OCI specification is required and modifies it accordingly before exec-ing
// into the wrapped runtime.
func (r *modifyingRuntimeWrapper) Exec(args []string) error {
if oci.HasCreateSubcommand(args) {
if HasCreateSubcommand(args) {
err := r.modify()
if err != nil {
return fmt.Errorf("could not apply required modification to OCI specification: %v", err)

View File

@@ -14,13 +14,12 @@
# limitations under the License.
*/
package runtime
package oci
import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
@@ -38,7 +37,7 @@ func TestExec(t *testing.T) {
args []string
modifyError error
writeError error
modifer oci.SpecModifier
modifer SpecModifier
}{
{
description: "no args forwards",
@@ -92,9 +91,9 @@ func TestExec(t *testing.T) {
hook.Reset()
t.Run(tc.description, func(t *testing.T) {
runtimeMock := &oci.RuntimeMock{}
specMock := &oci.SpecMock{
ModifyFunc: func(specModifier oci.SpecModifier) error {
runtimeMock := &RuntimeMock{}
specMock := &SpecMock{
ModifyFunc: func(specModifier SpecModifier) error {
return tc.modifyError
},
FlushFunc: func() error {
@@ -144,8 +143,8 @@ func TestExec(t *testing.T) {
func TestNilModiferReturnsRuntime(t *testing.T) {
logger, _ := testlog.NewNullLogger()
runtimeMock := &oci.RuntimeMock{}
specMock := &oci.SpecMock{}
runtimeMock := &RuntimeMock{}
specMock := &SpecMock{}
shim := NewModifyingRuntimeWrapper(
logger,

View File

@@ -30,8 +30,9 @@ type SpecModifier interface {
Modify(*specs.Spec) error
}
//go:generate moq -stub -out spec_mock.go . Spec
// Spec defines the operations to be performed on an OCI specification
//
//go:generate moq -stub -out spec_mock.go . Spec
type Spec interface {
Load() (*specs.Spec, error)
Flush() error

View File

@@ -14,28 +14,28 @@ var _ Spec = &SpecMock{}
// SpecMock is a mock implementation of Spec.
//
// func TestSomethingThatUsesSpec(t *testing.T) {
// func TestSomethingThatUsesSpec(t *testing.T) {
//
// // make and configure a mocked Spec
// mockedSpec := &SpecMock{
// FlushFunc: func() error {
// panic("mock out the Flush method")
// },
// LoadFunc: func() (*specs.Spec, error) {
// panic("mock out the Load method")
// },
// LookupEnvFunc: func(s string) (string, bool) {
// panic("mock out the LookupEnv method")
// },
// ModifyFunc: func(specModifier SpecModifier) error {
// panic("mock out the Modify method")
// },
// }
// // make and configure a mocked Spec
// mockedSpec := &SpecMock{
// FlushFunc: func() error {
// panic("mock out the Flush method")
// },
// LoadFunc: func() (*specs.Spec, error) {
// panic("mock out the Load method")
// },
// LookupEnvFunc: func(s string) (string, bool) {
// panic("mock out the LookupEnv method")
// },
// ModifyFunc: func(specModifier SpecModifier) error {
// panic("mock out the Modify method")
// },
// }
//
// // use mockedSpec in code that requires Spec
// // and then make assertions.
// // use mockedSpec in code that requires Spec
// // and then make assertions.
//
// }
// }
type SpecMock struct {
// FlushFunc mocks the Flush method.
FlushFunc func() error
@@ -92,7 +92,8 @@ func (mock *SpecMock) Flush() error {
// FlushCalls gets all the calls that were made to Flush.
// Check the length with:
// len(mockedSpec.FlushCalls())
//
// len(mockedSpec.FlushCalls())
func (mock *SpecMock) FlushCalls() []struct {
} {
var calls []struct {
@@ -122,7 +123,8 @@ func (mock *SpecMock) Load() (*specs.Spec, error) {
// LoadCalls gets all the calls that were made to Load.
// Check the length with:
// len(mockedSpec.LoadCalls())
//
// len(mockedSpec.LoadCalls())
func (mock *SpecMock) LoadCalls() []struct {
} {
var calls []struct {
@@ -155,7 +157,8 @@ func (mock *SpecMock) LookupEnv(s string) (string, bool) {
// LookupEnvCalls gets all the calls that were made to LookupEnv.
// Check the length with:
// len(mockedSpec.LookupEnvCalls())
//
// len(mockedSpec.LookupEnvCalls())
func (mock *SpecMock) LookupEnvCalls() []struct {
S string
} {
@@ -189,7 +192,8 @@ func (mock *SpecMock) Modify(specModifier SpecModifier) error {
// ModifyCalls gets all the calls that were made to Modify.
// Check the length with:
// len(mockedSpec.ModifyCalls())
//
// len(mockedSpec.ModifyCalls())
func (mock *SpecMock) ModifyCalls() []struct {
SpecModifier SpecModifier
} {

View File

@@ -13,22 +13,22 @@ var _ Constraint = &ConstraintMock{}
// ConstraintMock is a mock implementation of Constraint.
//
// func TestSomethingThatUsesConstraint(t *testing.T) {
// func TestSomethingThatUsesConstraint(t *testing.T) {
//
// // make and configure a mocked Constraint
// mockedConstraint := &ConstraintMock{
// AssertFunc: func() error {
// panic("mock out the Assert method")
// },
// StringFunc: func() string {
// panic("mock out the String method")
// },
// }
// // make and configure a mocked Constraint
// mockedConstraint := &ConstraintMock{
// AssertFunc: func() error {
// panic("mock out the Assert method")
// },
// StringFunc: func() string {
// panic("mock out the String method")
// },
// }
//
// // use mockedConstraint in code that requires Constraint
// // and then make assertions.
// // use mockedConstraint in code that requires Constraint
// // and then make assertions.
//
// }
// }
type ConstraintMock struct {
// AssertFunc mocks the Assert method.
AssertFunc func() error
@@ -67,7 +67,8 @@ func (mock *ConstraintMock) Assert() error {
// AssertCalls gets all the calls that were made to Assert.
// Check the length with:
// len(mockedConstraint.AssertCalls())
//
// len(mockedConstraint.AssertCalls())
func (mock *ConstraintMock) AssertCalls() []struct {
} {
var calls []struct {
@@ -96,7 +97,8 @@ func (mock *ConstraintMock) String() string {
// StringCalls gets all the calls that were made to String.
// Check the length with:
// len(mockedConstraint.StringCalls())
//
// len(mockedConstraint.StringCalls())
func (mock *ConstraintMock) StringCalls() []struct {
} {
var calls []struct {

View File

@@ -16,8 +16,9 @@
package constraints
//go:generate moq -stub -out constraint_mock.go . Constraint
// Constraint represents a constraint that is to be evaluated
//
//go:generate moq -stub -out constraint_mock.go . Constraint
type Constraint interface {
String() string
Assert() error

View File

@@ -23,8 +23,9 @@ import (
"golang.org/x/mod/semver"
)
//go:generate moq -stub -out property_mock.go . Property
// Property represents a property that is used to check requirements
//
//go:generate moq -stub -out property_mock.go . Property
type Property interface {
Name() string
Value() (string, error)

View File

@@ -13,31 +13,31 @@ var _ Property = &PropertyMock{}
// PropertyMock is a mock implementation of Property.
//
// func TestSomethingThatUsesProperty(t *testing.T) {
// func TestSomethingThatUsesProperty(t *testing.T) {
//
// // make and configure a mocked Property
// mockedProperty := &PropertyMock{
// CompareToFunc: func(s string) (int, error) {
// panic("mock out the CompareTo method")
// },
// NameFunc: func() string {
// panic("mock out the Name method")
// },
// StringFunc: func() string {
// panic("mock out the String method")
// },
// ValidateFunc: func(s string) error {
// panic("mock out the Validate method")
// },
// ValueFunc: func() (string, error) {
// panic("mock out the Value method")
// },
// }
// // make and configure a mocked Property
// mockedProperty := &PropertyMock{
// CompareToFunc: func(s string) (int, error) {
// panic("mock out the CompareTo method")
// },
// NameFunc: func() string {
// panic("mock out the Name method")
// },
// StringFunc: func() string {
// panic("mock out the String method")
// },
// ValidateFunc: func(s string) error {
// panic("mock out the Validate method")
// },
// ValueFunc: func() (string, error) {
// panic("mock out the Value method")
// },
// }
//
// // use mockedProperty in code that requires Property
// // and then make assertions.
// // use mockedProperty in code that requires Property
// // and then make assertions.
//
// }
// }
type PropertyMock struct {
// CompareToFunc mocks the CompareTo method.
CompareToFunc func(s string) (int, error)
@@ -105,7 +105,8 @@ func (mock *PropertyMock) CompareTo(s string) (int, error) {
// CompareToCalls gets all the calls that were made to CompareTo.
// Check the length with:
// len(mockedProperty.CompareToCalls())
//
// len(mockedProperty.CompareToCalls())
func (mock *PropertyMock) CompareToCalls() []struct {
S string
} {
@@ -136,7 +137,8 @@ func (mock *PropertyMock) Name() string {
// NameCalls gets all the calls that were made to Name.
// Check the length with:
// len(mockedProperty.NameCalls())
//
// len(mockedProperty.NameCalls())
func (mock *PropertyMock) NameCalls() []struct {
} {
var calls []struct {
@@ -165,7 +167,8 @@ func (mock *PropertyMock) String() string {
// StringCalls gets all the calls that were made to String.
// Check the length with:
// len(mockedProperty.StringCalls())
//
// len(mockedProperty.StringCalls())
func (mock *PropertyMock) StringCalls() []struct {
} {
var calls []struct {
@@ -197,7 +200,8 @@ func (mock *PropertyMock) Validate(s string) error {
// ValidateCalls gets all the calls that were made to Validate.
// Check the length with:
// len(mockedProperty.ValidateCalls())
//
// len(mockedProperty.ValidateCalls())
func (mock *PropertyMock) ValidateCalls() []struct {
S string
} {
@@ -229,7 +233,8 @@ func (mock *PropertyMock) Value() (string, error) {
// ValueCalls gets all the calls that were made to Value.
// Check the length with:
// len(mockedProperty.ValueCalls())
//
// len(mockedProperty.ValueCalls())
func (mock *PropertyMock) ValueCalls() []struct {
} {
var calls []struct {

33
internal/runtime/api.go Normal file
View File

@@ -0,0 +1,33 @@
package runtime
type rt struct {
logger *Logger
modeOverride string
}
// Interface is the interface for the runtime library.
type Interface interface {
Run([]string) error
}
// Option is a function that configures the runtime.
type Option func(*rt)
// New creates a runtime with the specified options.
func New(opts ...Option) Interface {
r := rt{}
for _, opt := range opts {
opt(&r)
}
if r.logger == nil {
r.logger = NewLogger()
}
return &r
}
// WithModeOverride allows for overriding the mode specified in the config.
func WithModeOverride(mode string) Option {
return func(r *rt) {
r.modeOverride = mode
}
}

View File

@@ -14,9 +14,10 @@
# limitations under the License.
*/
package main
package runtime
import (
"errors"
"fmt"
"io"
"os"
@@ -42,11 +43,17 @@ func NewLogger() *Logger {
}
}
// UpdateLogger constructs a Logger with a preddefined formatter
func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, error) {
// Update constructs a Logger with a preddefined formatter
func (l *Logger) Update(filename string, logLevel string, argv []string) {
configFromArgs := parseArgs(argv)
level, logLevelError := configFromArgs.getLevel(logLevel)
defer func() {
if logLevelError != nil {
l.Warn(logLevelError)
}
}()
var logFiles []*os.File
var argLogFileError error
@@ -55,7 +62,7 @@ func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, err
if !configFromArgs.version {
configLogFile, err := createLogFile(filename)
if err != nil {
return logger, fmt.Errorf("error opening debug log file: %v", err)
argLogFileError = errors.Join(argLogFileError, err)
}
if configLogFile != nil {
logFiles = append(logFiles, configLogFile)
@@ -65,16 +72,17 @@ func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, err
if argLogFile != nil {
logFiles = append(logFiles, argLogFile)
}
argLogFileError = err
argLogFileError = errors.Join(argLogFileError, err)
}
defer func() {
if argLogFileError != nil {
l.Warnf("Failed to open log file: %v", argLogFileError)
}
}()
l := &Logger{
Logger: logrus.New(),
previousLogger: logger.Logger,
logFiles: logFiles,
}
newLogger := logrus.New()
l.SetLevel(level)
newLogger.SetLevel(level)
if level == logrus.DebugLevel {
logrus.SetReportCaller(true)
// Shorten function and file names reported by the logger, by
@@ -92,30 +100,26 @@ func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, err
}
if configFromArgs.format == "json" {
l.SetFormatter(new(logrus.JSONFormatter))
newLogger.SetFormatter(new(logrus.JSONFormatter))
}
if len(logFiles) == 0 {
l.SetOutput(io.Discard)
newLogger.SetOutput(io.Discard)
} else if len(logFiles) == 1 {
l.SetOutput(logFiles[0])
newLogger.SetOutput(logFiles[0])
} else if len(logFiles) > 1 {
var writers []io.Writer
for _, f := range logFiles {
writers = append(writers, f)
}
l.SetOutput(io.MultiWriter(writers...))
newLogger.SetOutput(io.MultiWriter(writers...))
}
if logLevelError != nil {
l.Warn(logLevelError)
*l = Logger{
Logger: newLogger,
previousLogger: l.Logger,
logFiles: logFiles,
}
if argLogFileError != nil {
l.Warnf("Failed to open log file: %v", argLogFileError)
}
return l, nil
}
// Reset closes the log file (if any) and resets the logger output to what it
@@ -126,7 +130,9 @@ func (l *Logger) Reset() error {
if previous == nil {
previous = logrus.New()
}
logger = &Logger{Logger: previous}
l.Logger = previous
l.previousLogger = nil
l.logFiles = nil
}()
var errs []error
@@ -150,11 +156,16 @@ func (l *Logger) Reset() error {
}
func createLogFile(filename string) (*os.File, error) {
if filename != "" && filename != os.DevNull {
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if filename == "" || filename == os.DevNull {
return nil, nil
}
return nil, nil
if dir := filepath.Dir(filepath.Clean(filename)); dir != "." {
err := os.MkdirAll(dir, 0755)
if err != nil {
return nil, err
}
}
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
}
type loggerConfig struct {

View File

@@ -0,0 +1,34 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package runtime
import (
"testing"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
)
func TestLogger(t *testing.T) {
l := NewLogger()
l.Update("", "debug", nil)
require.Equal(t, logrus.DebugLevel, l.Logger.Level)
require.Equal(t, logrus.InfoLevel, l.previousLogger.Level)
}

109
internal/runtime/runtime.go Normal file
View File

@@ -0,0 +1,109 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package runtime
import (
"encoding/json"
"fmt"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/opencontainers/runtime-spec/specs-go"
)
// Run is an entry point that allows for idiomatic handling of errors
// when calling from the main function.
func (r rt) Run(argv []string) (rerr error) {
defer func() {
if rerr != nil {
r.logger.Errorf("%v", rerr)
}
}()
printVersion := hasVersionFlag(argv)
if printVersion {
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
}
cfg, err := config.GetConfig()
if err != nil {
return fmt.Errorf("error loading config: %v", err)
}
r.logger.Update(
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
argv,
)
defer func() {
if rerr != nil {
r.logger.Errorf("%v", rerr)
}
r.logger.Reset()
}()
// We apply some config updates here to ensure that the config is valid in
// all cases.
if r.modeOverride != "" {
cfg.NVIDIAContainerRuntimeConfig.Mode = r.modeOverride
}
cfg.NVIDIAContainerRuntimeHookConfig.Path = config.ResolveNVIDIAContainerRuntimeHookPath(r.logger.Logger, cfg.NVIDIAContainerRuntimeHookConfig.Path)
// Print the config to the output.
configJSON, err := json.MarshalIndent(cfg, "", " ")
if err == nil {
r.logger.Infof("Running with config:\n%v", string(configJSON))
} else {
r.logger.Infof("Running with config:\n%+v", cfg)
}
r.logger.Debugf("Command line arguments: %v", argv)
runtime, err := newNVIDIAContainerRuntime(r.logger.Logger, cfg, argv)
if err != nil {
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
}
if printVersion {
fmt.Print("\n")
}
return runtime.Exec(argv)
}
func (r rt) Errorf(format string, args ...interface{}) {
r.logger.Errorf(format, args...)
}
// TODO: This should be refactored / combined with parseArgs in logger.
func hasVersionFlag(args []string) bool {
for i := 0; i < len(args); i++ {
param := args[i]
parts := strings.SplitN(param, "=", 2)
trimmed := strings.TrimLeft(parts[0], "-")
// If this is not a flag we continue
if parts[0] == trimmed {
continue
}
// Check the version flag
if trimmed == "version" {
return true
}
}
return false
}

View File

@@ -14,7 +14,7 @@
# limitations under the License.
*/
package main
package runtime
import (
"fmt"
@@ -23,7 +23,6 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
"github.com/sirupsen/logrus"
)
@@ -50,7 +49,7 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
}
// Create the wrapping runtime with the specified modifier
r := runtime.NewModifyingRuntimeWrapper(
r := oci.NewModifyingRuntimeWrapper(
logger,
lowLevelRuntime,
ociSpec,
@@ -62,10 +61,15 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
modeModifier, err := newModeModifier(logger, cfg, ociSpec, argv)
mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode)
modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, argv)
if err != nil {
return nil, err
}
// For CDI mode we make no additional modifications.
if mode == "cdi" {
return modeModifier, nil
}
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, ociSpec)
if err != nil {
@@ -97,10 +101,10 @@ func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec
return modifiers, nil
}
func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
func newModeModifier(logger *logrus.Logger, mode string, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
switch mode {
case "legacy":
return modifier.NewStableRuntimeModifier(logger), nil
return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil
case "csv":
return modifier.NewCSVModifier(logger, cfg, ociSpec)
case "cdi":

View File

@@ -14,20 +14,52 @@
# limitations under the License.
*/
package main
package runtime
import (
"encoding/json"
"os"
"os/exec"
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
const (
runcExecutableName = "runc"
)
func TestMain(m *testing.M) {
// TEST SETUP
// Determine the module root and the test binary path
var err error
moduleRoot, err := test.GetModuleRoot()
if err != nil {
logrus.Fatalf("error in test setup: could not get module root: %v", err)
}
testBinPath := filepath.Join(moduleRoot, "test", "bin")
// Set the environment variables for the test
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
// Confirm that the environment is configured correctly
runcPath, err := exec.LookPath(runcExecutableName)
if err != nil || filepath.Join(testBinPath, runcExecutableName) != runcPath {
logrus.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
}
// RUN TESTS
exitCode := m.Run()
os.Exit(exitCode)
}
func TestFactoryMethod(t *testing.T) {
logger, _ := testlog.NewNullLogger()

View File

@@ -0,0 +1,154 @@
/**
# Copyright (c) NVIDIA CORPORATIOm. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvdevices
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
"github.com/sirupsen/logrus"
)
var errInvalidDeviceNode = errors.New("invalid device node")
// Interface provides a set of utilities for interacting with NVIDIA devices on the system.
type Interface struct {
devices.Devices
logger *logrus.Logger
dryRun bool
// devRoot is the root directory where device nodes are expected to exist.
devRoot string
mknoder
}
// New constructs a new Interface struct with the specified options.
func New(opts ...Option) (*Interface, error) {
i := &Interface{}
for _, opt := range opts {
opt(i)
}
if i.logger == nil {
i.logger = logrus.StandardLogger()
}
if i.devRoot == "" {
i.devRoot = "/"
}
if i.Devices == nil {
devices, err := devices.GetNVIDIADevices()
if err != nil {
return nil, fmt.Errorf("failed to create devices info: %v", err)
}
i.Devices = devices
}
if i.dryRun {
i.mknoder = &mknodLogger{i.logger}
} else {
i.mknoder = &mknodUnix{}
}
return i, nil
}
// CreateNVIDIAControlDevices creates the NVIDIA control device nodes at the configured devRoot.
func (m *Interface) CreateNVIDIAControlDevices() error {
controlNodes := []string{"nvidiactl", "nvidia-modeset", "nvidia-uvm", "nvidia-uvm-tools"}
for _, node := range controlNodes {
err := m.CreateNVIDIADevice(node)
if err != nil {
return fmt.Errorf("failed to create device node %s: %w", node, err)
}
}
return nil
}
// CreateNVIDIADevice creates the specified NVIDIA device node at the configured devRoot.
func (m *Interface) CreateNVIDIADevice(node string) error {
node = filepath.Base(node)
if !strings.HasPrefix(node, "nvidia") {
return fmt.Errorf("invalid device node %q: %w", node, errInvalidDeviceNode)
}
major, err := m.Major(node)
if err != nil {
return fmt.Errorf("failed to determine major: %w", err)
}
minor, err := m.Minor(node)
if err != nil {
return fmt.Errorf("failed to determine minor: %w", err)
}
return m.createDeviceNode(filepath.Join("dev", node), int(major), int(minor))
}
// createDeviceNode creates the specified device node with the require major and minor numbers.
// If a devRoot is configured, this is prepended to the path.
func (m *Interface) createDeviceNode(path string, major int, minor int) error {
path = filepath.Join(m.devRoot, path)
if _, err := os.Stat(path); err == nil {
m.logger.Infof("Skipping: %s already exists", path)
return nil
} else if !os.IsNotExist(err) {
return fmt.Errorf("failed to stat %s: %v", path, err)
}
return m.Mknode(path, major, minor)
}
// Major returns the major number for the specified NVIDIA device node.
// If the device node is not supported, an error is returned.
func (m *Interface) Major(node string) (int64, error) {
var valid bool
var major devices.Major
switch node {
case "nvidia-uvm", "nvidia-uvm-tools":
major, valid = m.Get(devices.NVIDIAUVM)
case "nvidia-modeset", "nvidiactl":
major, valid = m.Get(devices.NVIDIAGPU)
}
if valid {
return int64(major), nil
}
return 0, errInvalidDeviceNode
}
// Minor returns the minor number for the specified NVIDIA device node.
// If the device node is not supported, an error is returned.
func (m *Interface) Minor(node string) (int64, error) {
switch node {
case "nvidia-modeset":
return devices.NVIDIAModesetMinor, nil
case "nvidia-uvm-tools":
return devices.NVIDIAUVMToolsMinor, nil
case "nvidia-uvm":
return devices.NVIDIAUVMMinor, nil
case "nvidiactl":
return devices.NVIDIACTLMinor, nil
}
return 0, errInvalidDeviceNode
}

View File

@@ -0,0 +1,133 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvdevices
import (
"errors"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestCreateControlDevices(t *testing.T) {
logger, _ := testlog.NewNullLogger()
nvidiaDevices := &devices.DevicesMock{
GetFunc: func(name devices.Name) (devices.Major, bool) {
devices := map[devices.Name]devices.Major{
"nvidia-frontend": 195,
"nvidia-uvm": 243,
}
return devices[name], true
},
}
mknodeError := errors.New("mknode error")
testCases := []struct {
description string
root string
devices devices.Devices
mknodeError error
expectedError error
expectedCalls []struct {
S string
N1 int
N2 int
}
}{
{
description: "no root specified",
root: "",
devices: nvidiaDevices,
mknodeError: nil,
expectedCalls: []struct {
S string
N1 int
N2 int
}{
{"/dev/nvidiactl", 195, 255},
{"/dev/nvidia-modeset", 195, 254},
{"/dev/nvidia-uvm", 243, 0},
{"/dev/nvidia-uvm-tools", 243, 1},
},
},
{
description: "some root specified",
root: "/some/root",
devices: nvidiaDevices,
mknodeError: nil,
expectedCalls: []struct {
S string
N1 int
N2 int
}{
{"/some/root/dev/nvidiactl", 195, 255},
{"/some/root/dev/nvidia-modeset", 195, 254},
{"/some/root/dev/nvidia-uvm", 243, 0},
{"/some/root/dev/nvidia-uvm-tools", 243, 1},
},
},
{
description: "mknod error returns error",
devices: nvidiaDevices,
mknodeError: mknodeError,
expectedError: mknodeError,
// We expect the first call to this to fail, and the rest to be skipped
expectedCalls: []struct {
S string
N1 int
N2 int
}{
{"/dev/nvidiactl", 195, 255},
},
},
{
description: "missing major returns error",
devices: &devices.DevicesMock{
GetFunc: func(name devices.Name) (devices.Major, bool) {
return 0, false
},
},
expectedError: errInvalidDeviceNode,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
mknode := &mknoderMock{
MknodeFunc: func(string, int, int) error {
return tc.mknodeError
},
}
d, _ := New(
WithLogger(logger),
WithDevRoot(tc.root),
WithDevices(tc.devices),
)
d.mknoder = mknode
err := d.CreateNVIDIAControlDevices()
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expectedCalls, mknode.MknodeCalls())
})
}
}

View File

@@ -0,0 +1,46 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvdevices
import (
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
//go:generate moq -stub -out mknod_mock.go . mknoder
type mknoder interface {
Mknode(string, int, int) error
}
type mknodLogger struct {
*logrus.Logger
}
func (m *mknodLogger) Mknode(path string, major, minor int) error {
m.Infof("Running: mknod --mode=0666 %s c %d %d", path, major, minor)
return nil
}
type mknodUnix struct{}
func (m *mknodUnix) Mknode(path string, major, minor int) error {
err := unix.Mknod(path, unix.S_IFCHR, int(unix.Mkdev(uint32(major), uint32(minor))))
if err != nil {
return err
}
return unix.Chmod(path, 0666)
}

View File

@@ -0,0 +1,89 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package nvdevices
import (
"sync"
)
// Ensure, that mknoderMock does implement mknoder.
// If this is not the case, regenerate this file with moq.
var _ mknoder = &mknoderMock{}
// mknoderMock is a mock implementation of mknoder.
//
// func TestSomethingThatUsesmknoder(t *testing.T) {
//
// // make and configure a mocked mknoder
// mockedmknoder := &mknoderMock{
// MknodeFunc: func(s string, n1 int, n2 int) error {
// panic("mock out the Mknode method")
// },
// }
//
// // use mockedmknoder in code that requires mknoder
// // and then make assertions.
//
// }
type mknoderMock struct {
// MknodeFunc mocks the Mknode method.
MknodeFunc func(s string, n1 int, n2 int) error
// calls tracks calls to the methods.
calls struct {
// Mknode holds details about calls to the Mknode method.
Mknode []struct {
// S is the s argument value.
S string
// N1 is the n1 argument value.
N1 int
// N2 is the n2 argument value.
N2 int
}
}
lockMknode sync.RWMutex
}
// Mknode calls MknodeFunc.
func (mock *mknoderMock) Mknode(s string, n1 int, n2 int) error {
callInfo := struct {
S string
N1 int
N2 int
}{
S: s,
N1: n1,
N2: n2,
}
mock.lockMknode.Lock()
mock.calls.Mknode = append(mock.calls.Mknode, callInfo)
mock.lockMknode.Unlock()
if mock.MknodeFunc == nil {
var (
errOut error
)
return errOut
}
return mock.MknodeFunc(s, n1, n2)
}
// MknodeCalls gets all the calls that were made to Mknode.
// Check the length with:
//
// len(mockedmknoder.MknodeCalls())
func (mock *mknoderMock) MknodeCalls() []struct {
S string
N1 int
N2 int
} {
var calls []struct {
S string
N1 int
N2 int
}
mock.lockMknode.RLock()
calls = mock.calls.Mknode
mock.lockMknode.RUnlock()
return calls
}

View File

@@ -0,0 +1,53 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvdevices
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
"github.com/sirupsen/logrus"
)
// Option is a function that sets an option on the Interface struct.
type Option func(*Interface)
// WithDryRun sets the dry run option for the Interface struct.
func WithDryRun(dryRun bool) Option {
return func(i *Interface) {
i.dryRun = dryRun
}
}
// WithLogger sets the logger for the Interface struct.
func WithLogger(logger *logrus.Logger) Option {
return func(i *Interface) {
i.logger = logger
}
}
// WithDevRoot sets the root directory for the NVIDIA device nodes.
func WithDevRoot(devRoot string) Option {
return func(i *Interface) {
i.devRoot = devRoot
}
}
// WithDevices sets the devices for the Interface struct.
func WithDevices(devices devices.Devices) Option {
return func(i *Interface) {
i.Devices = devices
}
}

View File

@@ -0,0 +1,49 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvmodules
import (
"fmt"
"os/exec"
"strings"
"github.com/sirupsen/logrus"
)
//go:generate moq -stub -out cmd_mock.go . cmder
type cmder interface {
// Run executes the command and returns the stdout, stderr, and an error if any
Run(string, ...string) error
}
type cmderLogger struct {
*logrus.Logger
}
func (c *cmderLogger) Run(cmd string, args ...string) error {
c.Infof("Running: %v %v", cmd, strings.Join(args, " "))
return nil
}
type cmderExec struct{}
func (c *cmderExec) Run(cmd string, args ...string) error {
if output, err := exec.Command(cmd, args...).CombinedOutput(); err != nil {
return fmt.Errorf("%w; output=%v", err, string(output))
}
return nil
}

Some files were not shown because too many files have changed in this diff Show More