Compare commits

..

536 Commits

Author SHA1 Message Date
Evan Lezar
6094effd58 Merge branch 'debug-no-cgroups' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!478
2023-09-07 15:55:06 +00:00
Evan Lezar
7187608a36 Update libnvidia-container 2023-09-07 15:55:06 +00:00
Evan Lezar
a54d9d2118 Merge branch 'debug-no-cgroups' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!477
2023-09-07 15:36:23 +00:00
Evan Lezar
56dd69ff1c Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-09-07 17:35:48 +02:00
Evan Lezar
89240cecae Merge branch 'debug-no-cgroups' into 'main'
Add required option to new toml config

See merge request nvidia/container-toolkit/container-toolkit!476
2023-09-07 11:16:34 +00:00
Evan Lezar
4ec9bd751e Add required option to new toml config
This change adds a "required" option to the new toml config
that controls whether a default config is returned or not.
This is useful from the NVIDIA Container Runtime Hook, where
/run/driver/nvidia/etc/nvidia-container-runtime/config.toml
is checked before the standard path.

This fixes a bug where the default config was always applied
when this config was not used.

See https://github.com/NVIDIA/nvidia-container-toolkit/issues/106

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-09-07 11:56:01 +02:00
Evan Lezar
d74f7fef4e Update libnvidia-container to fix rpm builds
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-09-07 11:55:46 +02:00
Evan Lezar
538d4020df Bump version to v1.14.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-09-06 17:51:52 +02:00
Evan Lezar
f2bd3173d4 Merge branch 'bump-version-v1.14.0' into 'main'
Bump verison to v1.14.0

See merge request nvidia/container-toolkit/container-toolkit!475
2023-08-29 14:45:37 +00:00
Evan Lezar
2bf8017516 Bump verison to v1.14.0
Note that v1.14.0-rc.3 was an internal-only release.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-29 16:03:41 +02:00
Evan Lezar
2a3afdd5d9 Merge branch 'fix-platform-detection' into 'main'
Add UsesNVGPUModule info function

See merge request nvidia/container-toolkit/container-toolkit!473
2023-08-28 15:58:41 +00:00
Evan Lezar
1dc028cdf2 Add UsesNVGPUModule info function
This change adds a UsesNVGPUModule function that checks whether the nvgpu
kernel module is used by NVML. This allows for more robust detection of
Tegra-based platforms where libnvidia-ml.so is supported to enumerate the
iGPU.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-25 11:24:34 +02:00
Evan Lezar
72c56567fe Merge branch 'CNT-4496/remove/sys/devices/soc' into 'main'
Remove /sys/devices/soc0/family from CDI spec

See merge request nvidia/container-toolkit/container-toolkit!472
2023-08-25 08:26:15 +00:00
Evan Lezar
ca1055588d Remove /sys/devices/soc0/family from CDI spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-25 10:25:33 +02:00
Evan Lezar
fca30d7acc Merge branch 'fix-config-file' into 'main'
Properly create output for config file

See merge request nvidia/container-toolkit/container-toolkit!471
2023-08-23 08:55:58 +00:00
Evan Lezar
5bf2209fdb Properly create output for config file
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-23 09:41:46 +02:00
Evan Lezar
f86a5abeb6 Merge branch 'CNT-4478/fix-unknown-devices' into 'main'
Update go-nvlib dependency to  v0.0.0-20230818092907-09424fdc8884

See merge request nvidia/container-toolkit/container-toolkit!470
2023-08-21 09:05:58 +00:00
Evan Lezar
9ac313f551 Instantiate nvpci.Interface with logger
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-18 11:40:17 +02:00
Evan Lezar
546f810159 Update go-nvlib dependency to v0.0.0-20230818092907-09424fdc8884
This change updates go-nvlib to include logic to skip NVIDIA PCI-E
devices where the name or class id is not known.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-18 11:40:15 +02:00
Evan Lezar
7affdafcd3 Merge branch 'CNT-4286/set-nvidia-visible-devices-to-void' into 'main'
Set NVIDIA_VISIBLE_DEVICES=void in toolkit-container

See merge request nvidia/container-toolkit/container-toolkit!469
2023-08-15 10:36:09 +00:00
Evan Lezar
7221b6b24b Set NVIDIA_VISIBLE_DEVICES=void in toolkit-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-15 11:52:56 +02:00
Tariq Ibrahim
f904ec41eb Merge branch 'log-unresolved-devices' into 'main'
add a warning statement listing unresolved CDI devices

See merge request nvidia/container-toolkit/container-toolkit!461
2023-08-14 17:22:36 +00:00
Evan Lezar
e7ae0f183c Merge branch 'rename-library-search-path' into 'main'
Add library-search-path option to cdi generate

See merge request nvidia/container-toolkit/container-toolkit!468
2023-08-14 13:49:26 +00:00
Evan Lezar
86df7c6696 Add library-search-path option to cdi generate
This change renames the csv.library-search-path option to
library-search-path so as to be more generally applicable in
future. Note that the option is still only applied in csv mode.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 15:04:33 +02:00
Evan Lezar
99923b57b8 Merge branch 'add-config-set-command' into 'main'
Allow config options to be set usign the nvidia-ctk config command

See merge request nvidia/container-toolkit/container-toolkit!464
2023-08-14 11:18:57 +00:00
Evan Lezar
4addb292b1 Extend nvidia-ctk config command to allow options to be set
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 11:33:26 +02:00
Evan Lezar
149a8d7bd8 Simplify nvidia-ctk config default command
This chagne simplifies the nvidia-ctk config default command.
By default it now outputs the default config to STDOUT, and can
optionally output this to file.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 11:32:54 +02:00
Evan Lezar
a69657dde7 Add config.Toml type to handle config files
This change introduced a config.Toml type that is used as the base for
config file processing and manipulation. This ensures that configs --
including commented values -- can be handled consistently.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 11:32:54 +02:00
Evan Lezar
c2d4de54b0 Add function to get config file path. 2023-08-14 11:32:54 +02:00
Evan Lezar
5216e89a70 Merge branch 'refactor-hook-configs' into 'main'
Migrate to internal/config.Config structs for the NVIDIA Container Runtime Hook config.

See merge request nvidia/container-toolkit/container-toolkit!463
2023-08-14 09:23:43 +00:00
Evan Lezar
96766aa719 Remove BurntSushi/toml go dependency
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 10:40:42 +02:00
Evan Lezar
3670e7b89e Refactor loading of hook configs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 10:40:42 +02:00
Evan Lezar
b18ac09f77 Refactor handling of DriverCapabilities
This change consolidates the handling of NVIDIA_DRIVER_CAPABILITIES in the
interal/image package.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 10:40:42 +02:00
Evan Lezar
4dcaa61167 Use internal/config structs in hook
This change ensures that the Config structs from internal.Config
are used for the NVIDIA Container Runtime Hook config too.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 10:40:41 +02:00
Evan Lezar
8bf52e1dec Export config.GetDefault function
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-14 10:35:33 +02:00
Evan Lezar
e4722e9642 Merge branch 'add-runtime-hook-config' into 'main'
Add support for creating oci hook to nvidia-ctk

See merge request nvidia/container-toolkit/container-toolkit!467
2023-08-14 08:33:56 +00:00
Evan Lezar
65f6f46846 Remove installation of oci-nvidia-hook files in RPM packages
This change removes installation of the oci-nvidia-hook files.
These files conflict with CDI use in runtimes that support it.

The use of the hook should be considered deprecated on these platforms.

If a hook is required, the

nvidia-ctk runtime configure --config-mode=oci-hook

command should be used to create the hook file(s).

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-11 16:34:58 +02:00
Evan Lezar
f6a4986c15 Add support for creating oci hook to nvidia-ctk
This change extends the nvidia-ctk runtime configure command
with a --config-mode=oci-hook that creates an OCI hook json file.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-11 16:34:58 +02:00
Tariq Ibrahim
6d3b29f3ca add a warning statement listing unresolved CDI devices 2023-08-10 08:38:33 -07:00
Evan Lezar
30c0848487 Merge branch 'fix-libnvidia-container0-url' into 'main'
Use stable repo URL directly

See merge request nvidia/container-toolkit/container-toolkit!465
2023-08-10 14:31:06 +00:00
Evan Lezar
ee1b0c3e4f Use stable repo URL directly
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-10 16:30:25 +02:00
Evan Lezar
37ac294a11 Merge branch 'add-deb-and-rpm-repos' into 'main'
Publish generic deb and rpm repos.

See merge request nvidia/container-toolkit/container-toolkit!460
2023-08-10 13:35:12 +00:00
Evan Lezar
0d862efa9c Publish generic deb and rpm repos.
This change ensures that the centos7 and ubuntu18.04 packages are
published to the generic rpm and deb repos, respectively.

All other packages except the centos8-ppc64le packages are skipped
as these use cases are covered by the generic packages.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-09 17:54:41 +02:00
Evan Lezar
22d7b52a58 Merge branch 'set-libnvidia-container-version' into 'main'
Set libnvidia-container version to toolkit version

See merge request nvidia/container-toolkit/container-toolkit!459
2023-08-09 12:10:36 +00:00
Evan Lezar
9f1c9b2a31 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-09 13:24:48 +02:00
Evan Lezar
0483eebc7b Set libnvidia-container version to toolkit version
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-09 13:24:08 +02:00
Evan Lezar
54aacb4245 Merge branch 'list-shows-errors' into 'main'
Log registry refresh errors in cdi list

See merge request nvidia/container-toolkit/container-toolkit!458
2023-08-08 15:26:42 +00:00
Evan Lezar
5cb367e771 Merge branch 'sort-cdi-entities' into 'main'
Sort CDI entities in generated CDI specifications

See merge request nvidia/container-toolkit/container-toolkit!457
2023-08-08 14:11:17 +00:00
Evan Lezar
feb069a2e9 Log registry refresh errors in cdi list
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-08 16:00:36 +02:00
Evan Lezar
cbdbcd87ff Add sorter to simplifying transformer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-08 15:27:04 +02:00
Evan Lezar
7a4d2cff67 Add merged CDI spec transformer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-08 14:45:31 +02:00
Evan Lezar
5638f47cb0 Add sort CDI spec transoformer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-08 14:45:31 +02:00
Evan Lezar
4c513d536b Merge branch 'improve-csv-cdi-spec-generation' into 'main'
Rework CSV file support to enable more robust CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!447
2023-08-04 16:40:15 +00:00
Evan Lezar
8553fce68a Specify library search paths for CSV CDI spec generation
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-04 16:49:30 +02:00
Evan Lezar
03a4e2f8a9 Skip symlinks to libraries
In order to properly handle systems with both iGPU and dGPU
drivers included, we skip "sym" mount specifications which
refer to .so or .so.[1-9] files.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-04 16:49:30 +02:00
Evan Lezar
918bd03488 Move tegra-specifics to new package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-04 16:49:30 +02:00
Evan Lezar
01a7f7bb8e Explicitly generate CDI spec for CSV mode
This change explicitly generates a CDI specification from
the supplied CSV files when cdi mode is detected. This
ensures consistency between the behaviour on Tegra-based
systems.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-04 16:49:30 +02:00
Evan Lezar
6b48cbd1dc Move CDI modifier to separate package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-04 16:49:30 +02:00
Evan Lezar
64a0a67eb4 Merge branch 'bump-version' into 'main'
Bump version to 1.14.0-rc.3

See merge request nvidia/container-toolkit/container-toolkit!456
2023-08-04 14:16:56 +00:00
Evan Lezar
93d9e18f04 Update libnvidia-container to 1.14.0~rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-04 15:17:00 +02:00
Evan Lezar
7c2c42b8da Bump version to 1.14.0-rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-08-04 15:12:29 +02:00
Evan Lezar
e4fee325cb Merge branch 'fix-hook' into 'main'
Handle empty root in config

See merge request nvidia/container-toolkit/container-toolkit!454
2023-07-19 12:45:49 +00:00
Evan Lezar
ec63533eb1 Ensure default config comments are consistent
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-19 14:37:49 +02:00
Evan Lezar
e51621aa7f Handle empty root in config
If the config.toml has an empty root specified, this could be
passed to the NVIDIA Container CLI through the --root flag
which causes argument parsing to fail. This change only
adds the --root flag if the config option is specified
and is non-empty.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-19 14:02:23 +02:00
Evan Lezar
80a78e60d1 Merge branch 'device-namer' into 'main'
Refactor device namer

See merge request nvidia/container-toolkit/container-toolkit!453
2023-07-18 14:16:01 +00:00
Evan Lezar
9f46c34587 Support device name strategies for Tegra devices
This change generates CDI specifications for Tegra devices
with the nvidia.com/gpu=0 name by default. The type-index
nameing strategy is also supported and will generate a device
with the name nvidia.com/gpu=gpu0.

The uuid naming strategy will raise an error if selected.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 16:13:38 +02:00
Evan Lezar
f07a0585fc Refactor device namer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 16:13:37 +02:00
Evan Lezar
32ec10485e Merge branch 'lookup-functional-options' into 'main'
Use functional options when creating Symlink and Directory locators

See merge request nvidia/container-toolkit/container-toolkit!452
2023-07-18 13:39:23 +00:00
Evan Lezar
ce7d5f7a51 Use functional options when constructing direcory locator
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 15:36:03 +02:00
Evan Lezar
9b64d74f6a Use functional options when constructing Symlink locator
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 15:31:15 +02:00
Evan Lezar
99cc0aebd6 Merge branch 'pass-image-to-csv-constructor' into 'main'
Pass image when constructing CSV modifier

See merge request nvidia/container-toolkit/container-toolkit!451
2023-07-18 13:30:53 +00:00
Evan Lezar
cca343abb0 Pass image when constructing CSV modifier
Since the incoming OCI spec has already been parsed and used to
construct a CUDA image representation, pass this to the CSV
modifier constructor instead of re-creating an image representation.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 15:27:16 +02:00
Evan Lezar
f08e48e700 Merge branch 'set-cdi-spec-dirs-in-config' into 'main'
Set default spec dirs at config level

See merge request nvidia/container-toolkit/container-toolkit!450
2023-07-18 13:25:29 +00:00
Evan Lezar
e2f8d2a15f Set default spec dirs at config level
This change sets the default CDI spec dirs at a config level instead
of when a CDI runtime modifier is constructed. This makes this setting
consistent with other options such as the nvidia-ctk path.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 15:23:09 +02:00
Evan Lezar
2c5761d32e Merge branch 'bug-fixes' into 'main'
Minor fixes and cleanups

See merge request nvidia/container-toolkit/container-toolkit!449
2023-07-18 13:20:46 +00:00
Evan Lezar
3c9d95c62f Fix usage string in CLI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 15:20:24 +02:00
Evan Lezar
481000b4ce Remove unused argument
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 15:20:24 +02:00
Evan Lezar
b2126722e5 Update vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 15:16:25 +02:00
Evan Lezar
083b789102 Use cdi parser package for IsQualiedName
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-18 15:16:25 +02:00
Evan Lezar
a564b38b7e Merge branch 'remove-centos7-aarch64-scan' into 'main'
Remove centos7-arm64 scan

See merge request nvidia/container-toolkit/container-toolkit!445
2023-07-17 14:29:17 +00:00
Evan Lezar
5427249cfc Remove centos7-arm64 scan
Since we don't publish a centos7-arm64 image, the scan does not
make sense.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-17 16:28:31 +02:00
Evan Lezar
032982ab9c Merge branch 'bump-dependencies' into 'main'
Bump dependencies

See merge request nvidia/container-toolkit/container-toolkit!444
2023-07-17 14:13:12 +00:00
Evan Lezar
96aeb9bf64 Update container-device-interface to v0.6.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-17 14:12:06 +02:00
Evan Lezar
c98f6ea395 Update containerized docker files for golang 1.20.5
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-17 14:10:05 +02:00
Evan Lezar
073f9cf120 Bump golang version to 1.20.5
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-17 14:06:48 +02:00
Evan Lezar
1fdd0c1248 Merge branch 'bump-changelog' into 'main'
Fix changelog for 1.14.0-rc.2

See merge request nvidia/container-toolkit/container-toolkit!443
2023-07-17 12:04:39 +00:00
Evan Lezar
a883c65dd6 Fix changelog for 1.14.0-rc.2 2023-07-17 12:04:38 +00:00
Evan Lezar
aac39f89cc Merge branch 'update-libnvidia-container' into 'main'
Include Shared Compiler Library (libnvidia-gpucomp.so) in the list of compute libaries.

See merge request nvidia/container-toolkit/container-toolkit!442
2023-07-13 12:57:40 +00:00
Evan Lezar
e25576d26d Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-13 14:15:33 +02:00
Evan Lezar
3626a13273 Merge branch 'fix-disable-require' into 'main'
Return empty requirements if NVIDIA_DISABLE_REQUIRE is true

See merge request nvidia/container-toolkit/container-toolkit!438
2023-07-11 11:48:35 +00:00
Evan Lezar
6750ce1667 Print invalid version on parse error
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-11 13:47:39 +02:00
Evan Lezar
1081cecea9 Return empty requirements if NVIDIA_DISABLE_REQUIRE is true
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-11 13:47:37 +02:00
Evan Lezar
7451e6eb75 Merge branch 'custom-firmware-paths' into 'main'
Add firmware search paths when generating CDI specifications

See merge request nvidia/container-toolkit/container-toolkit!439
2023-07-11 09:16:33 +00:00
Evan Lezar
81908c8cc9 Search custom firmware paths first
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-11 10:34:14 +02:00
Evan Lezar
d3d41a3e1d Simplify handling of custom firmware path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-11 10:31:50 +02:00
Evan Lezar
0a37f8798a Add firmware search paths when generating CDI specifications
Path to locate the GSP firmware is explicitly set to /lib/firmware/nvidia.
Users may chose to install the GSP firmware in alternate locations where
the kernel would look for firmware on the root filesystem.

Add locate functionality which looks for the GSP firmware files in the
same location as the kernel would
(https://docs.kernel.org/driver-api/firmware/fw_search_path.html).

The paths searched in order are:
- path described in /sys/module/firmware_class/parameters/path
- /lib/firmware/updates/UTS_RELEASE/
- /lib/firmware/updates/
- /lib/firmware/UTS_RELEASE/
- /lib/firmware/

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-11 10:31:50 +02:00
Evan Lezar
4f89b60ab9 Merge branch 'remove-experimental-runtime' into 'main'
Remove NVIDIA experimental runtime from toolkit container

See merge request nvidia/container-toolkit/container-toolkit!238
2023-07-10 10:25:55 +00:00
Evan Lezar
0938576618 Remove NVIDIA experimental runtime from toolkit container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-10 11:44:55 +02:00
Evan Lezar
4ca8d4173a Merge branch 'revert-d5cbe48d' into 'main'
Revert "Merge branch 'bump-golang-1.20.5' into 'main'"

See merge request nvidia/container-toolkit/container-toolkit!437
2023-07-05 15:12:01 +00:00
Evan Lezar
978549dc58 Revert "Merge branch 'bump-golang-1.20.5' into 'main'"
This reverts merge request !436
2023-07-05 15:11:41 +00:00
Evan Lezar
d5cbe48d59 Merge branch 'bump-golang-1.20.5' into 'main'
Bump golang version to 1.20.5

See merge request nvidia/container-toolkit/container-toolkit!436
2023-07-05 14:07:58 +00:00
Evan Lezar
e8ec795883 Bump golang version to 1.20.5
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-05 16:07:41 +02:00
Evan Lezar
62bc6b211f Merge branch 'bump-cuda-12.2.0' into 'main'
Bump cuda base image to 12.2.0

See merge request nvidia/container-toolkit/container-toolkit!435
2023-07-05 10:11:27 +00:00
Evan Lezar
6fac6c237b Bump cuda base image to 12.2.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-05 11:28:32 +02:00
Evan Lezar
20ff4e2fb9 Merge branch 'generate-default-config-post-install' into 'main'
Ensure that default config is created on the file system as a post-install step

See merge request nvidia/container-toolkit/container-toolkit!431
2023-07-05 09:27:29 +00:00
Evan Lezar
f78d3a858f Rework default config generation to not use toml
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-05 11:26:55 +02:00
Evan Lezar
bc6ca7ff88 Generate default config post-install
The debian and rpm packages are updated to trigger the generation of
of a default config if no config exists at the expected location.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-05 11:26:53 +02:00
Evan Lezar
65ae6f1dab Fix generation of default config
This change ensures that the nvidia-ctk config default command
generates a config file that is compatible with the official documentation
to, for example, disable cgroups in the NVIDIA Container CLI.

This requires that whitespace around comments is stripped before outputing the
contets.

This also adds an option to load a config and modify it in-place instead. This can
be triggered as a post-install step, for example.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-05 11:26:04 +02:00
Evan Lezar
ba24338122 Add quiet mode to nvidia-ctk cli
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-05 11:26:04 +02:00
Evan Lezar
2299c9588d Merge branch 'create-config-folders' into 'main'
Ensure that folders exist when creating config files

See merge request nvidia/container-toolkit/container-toolkit!433
2023-07-05 09:25:28 +00:00
Evan Lezar
ba80d0318f Merge branch 'rpm-fix-missing-coreutils-during-install' into 'main'
RPM spec: Avoid scriptlet failure during initial system installation

See merge request nvidia/container-toolkit/container-toolkit!432
2023-07-05 08:43:26 +00:00
Evan Lezar
6342dae0e9 Ensure that parent directories exist for config files
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-03 15:30:31 +02:00
Evan Lezar
baf94181aa Add engine.Config to encapsulate writing
This change adds an engine.Config type to encapsulate the writing
of config files for container engines.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-07-03 15:26:47 +02:00
Evan Lezar
bbe9742c46 Merge branch 'switch-to-latest-dind' into 'main'
Switch to latest dind image for tests

See merge request nvidia/container-toolkit/container-toolkit!430
2023-06-30 09:46:49 +00:00
Evan Lezar
1447ef3818 Switch to latest dind image for tests
The stable-dind image is out of date and has not been updated for 3 years.
This change updates to the latest dind image.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-30 11:03:07 +02:00
Claudius Volz
5598dbf9d7 RPM spec: Only run fixup code if the package is being upgraded, to avoid a scenario where the coreutils (mkdir, cp) are not available yet during a fresh system installation.
Signed-off-by: Claudius Volz <c.volz@gmx.de>
2023-06-29 00:23:24 +02:00
Evan Lezar
8967e851c4 Merge branch 'fix-multiple-driver-roots-wsl' into 'main'
Fix bug with multiple driver store paths

See merge request nvidia/container-toolkit/container-toolkit!425
2023-06-27 14:15:38 +00:00
Evan Lezar
15378f6ced Merge branch 'fix-ordering-of-envvars' into 'main'
Ensure common envvars have higher precedence

See merge request nvidia/container-toolkit/container-toolkit!426
2023-06-27 13:26:34 +00:00
Evan Lezar
4d2e8d1913 Ensure common envvars have higher precedence
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 14:45:15 +02:00
Evan Lezar
4feaee0fe6 Merge branch 'bump-version' into 'main'
Bump version to v1.14.0-rc.2

See merge request nvidia/container-toolkit/container-toolkit!427
2023-06-27 12:38:05 +00:00
Evan Lezar
51984d49cf Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 14:37:26 +02:00
Evan Lezar
a6a8bb940c Bump version to v1.14.0-rc.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-27 14:02:59 +02:00
Evan Lezar
6265e34afb Fix bug with multiple driver store paths
This change uses the actual discovered path of nvidia-smi when
creating a symlink to the binary on WSL2 platforms.

This ensures that cases where multiple driver store paths are
detected are supported.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-26 21:37:14 +02:00
Evan Lezar
d08a2394b3 Merge branch 'fix-package-archive-script' into 'main'
Fix package archive script

See merge request nvidia/container-toolkit/container-toolkit!424
2023-06-26 11:46:26 +00:00
Evan Lezar
c0f1263d78 Fix package archive script
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-26 13:46:08 +02:00
Evan Lezar
a25b1c1048 Merge branch 'fix-load-kernel-modules' into 'main'
Split internal system package

See merge request nvidia/container-toolkit/container-toolkit!420
2023-06-26 08:30:51 +00:00
Evan Lezar
99859e461d Merge branch 'import-wrapper-and-runtime' into 'main'
Import NVIDIA Docker and NVIDIA Container Runtime to in-tree folders

See merge request nvidia/container-toolkit/container-toolkit!418
2023-06-21 12:33:05 +00:00
Evan Lezar
d52dbeaa7a Split internal system package
This changes splits the functionality in the internal system package
into two packages: one for dealing with devices and one for dealing
with kernel modules. This removes ambiguity around the meaning of
driver / device roots in each case.

In each case, a root can be specified where device nodes are created
or kernel modules loaded.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-15 09:01:13 +02:00
Evan Lezar
c11c7695cb Merge branch 'update-go-nvlib' into 'main'
Update go-nvlib with new constructor API

See merge request nvidia/container-toolkit/container-toolkit!422
2023-06-14 22:50:12 +00:00
Evan Lezar
c4d3b13ae2 Update go-nvlib with new constructor
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-14 17:55:33 +02:00
Evan Lezar
bcf3a70174 Update vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-14 17:55:18 +02:00
Evan Lezar
743d290577 Merge branch 'CNT-4301/resolve-auto-to-cdi' into 'main'
Resolve auto mode to cdi if all devices are cdi devices

See merge request nvidia/container-toolkit/container-toolkit!421
2023-06-13 14:48:32 +00:00
Evan Lezar
82347eb9bc Resolve auto mode as cdi for fully-qualified names
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-13 16:05:37 +02:00
Evan Lezar
84c7bf8b18 Minor refactor of mode resolver
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-13 16:04:03 +02:00
Evan Lezar
d92300506c Construct CUDA image object once
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-13 10:36:02 +02:00
Evan Lezar
2da32970b9 Merge branch 'refactor-logger' into 'main'
Refactor Logging

See merge request nvidia/container-toolkit/container-toolkit!416
2023-06-12 08:59:41 +00:00
Evan Lezar
1d0a733487 Replace logger.Warn(f) with logger.Warning(f)
This aligns better with klog used in other projects.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-12 10:48:04 +02:00
Evan Lezar
9464953924 Use logger.Interface when resolving auto mode
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-12 10:46:11 +02:00
Evan Lezar
c9b05d8fed Use logger Interface in runtime configuration
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-12 10:46:11 +02:00
Evan Lezar
a02bc27c3e Define a basic logger interface
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-12 10:46:10 +02:00
Carlos Eduardo Arango Gutierrez
6a04e97bca Merge branch 'use-same-envvars-for-runtime-config' into 'main'
Allow same envars for all runtime configs

See merge request nvidia/container-toolkit/container-toolkit!357
2023-06-12 08:32:34 +00:00
Evan Lezar
0780621024 Ensure runtime dir is set for crio cleanup
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-11 12:46:08 +02:00
Evan Lezar
2bc0f45a52 Remove unused constants and variables
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-11 11:38:22 +02:00
Evan Lezar
178eb5c5a8 Rework restart logic
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-10 12:41:53 +02:00
Evan Lezar
761fc29567 Add version info to config CLIs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-09 18:49:17 +02:00
Evan Lezar
9f5c82420a Refactor toolking to setup and cleanup configs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-09 18:49:15 +02:00
Evan Lezar
23041be511 Add runtimeDir as argument
Thsi change adds the --nvidia-runtime-dir as a command line
argument when configuring container runtimes in the toolkit container.
This removes the need to set it via the command line.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-09 18:48:34 +02:00
Evan Lezar
dcbf4b4f2a Allow same envars for all runtime configs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-09 18:46:34 +02:00
Evan Lezar
652345bc4d Add nvidia-container-runtime as folder
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-07 13:17:48 +02:00
Evan Lezar
69a1a9ef7a Add nvidia-docker as folder
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-07 13:15:23 +02:00
Evan Lezar
2464181d2b Remove runtime and wrapper submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-07 13:09:18 +02:00
Evan Lezar
c3c1d19a5c Merge branch 'bump-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!415
2023-06-06 19:22:43 +00:00
Evan Lezar
75f288a6e4 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-06 21:22:25 +02:00
Evan Lezar
94259baea1 Merge branch 'CNT-4302/cdi-only' into 'main'
Skip additional modifications in CDI mode

See merge request nvidia/container-toolkit/container-toolkit!413
2023-06-06 18:29:59 +00:00
Evan Lezar
9e8ff003b6 Merge branch 'bump-libnvidia-container' into 'main'
Bump libnvidia container

See merge request nvidia/container-toolkit/container-toolkit!414
2023-06-06 15:29:35 +00:00
Evan Lezar
3dee9d9a4c Support OpenSSL 3 with the Encrypt/Decrypt library
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-06 16:47:21 +02:00
Evan Lezar
3f03a71afd Skip additional modifications in CDI mode
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-05 15:01:58 +02:00
Evan Lezar
093e93cfbf Merge branch 'bump-cuda-base-image' into 'main'
Bump CUDA baseimage version to 12.1.1

See merge request nvidia/container-toolkit/container-toolkit!412
2023-06-01 12:42:48 +00:00
Evan Lezar
78f619b1e7 Bump CUDA baseimage version to 12.1.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-06-01 14:42:30 +02:00
Evan Lezar
43c44a0f48 Merge branch 'treat-log-errors-as-non-fatal' into 'main'
Ignore errors when creating debug log file

See merge request nvidia/container-toolkit/container-toolkit!404
2023-06-01 07:44:56 +00:00
Evan Lezar
6b1e8171c8 Merge branch 'add-mod-probe' into 'main'
Add option to load NVIDIA kernel modules

See merge request nvidia/container-toolkit/container-toolkit!409
2023-05-31 18:14:45 +00:00
Evan Lezar
2e50b3da7c Merge branch 'ldcache-resolve-circular' into 'main'
Fix infinite recursion when resolving libraries in LDCache

Closes #13

See merge request nvidia/container-toolkit/container-toolkit!406
2023-05-31 17:35:27 +00:00
Evan Lezar
eca13e72bf Update CHANGELOG
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-31 19:33:31 +02:00
Evan Lezar
b64ba6ac2d Add option to create device nodes
This change adds a --create-device-nodes option to the
nvidia-ctk system create-dev-char-symlinks command to create
device nodes. The currently only creates control device nodes.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-31 19:31:38 +02:00
Evan Lezar
7b801a0ce0 Add option to load NVIDIA kernel modules
These changes add a --load-kernel-modules option to the
nvidia-ctk system commands. If specified the NVIDIA kernel modules
(nvidia, nvidia-uvm, and nvidia-modeset) are loaded before any
operations on device nodes are performed.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-31 19:31:38 +02:00
Evan Lezar
528cbbb636 Merge branch 'fix-device-symlinks' into 'main'
Fix creation of device symlinks in /dev/char

See merge request nvidia/container-toolkit/container-toolkit!399
2023-05-31 17:31:04 +00:00
Evan Lezar
fd48233c13 Merge branch 'fix-ubi-pipeline-dependency' into 'main'
Fix ui8 image job dependencies

See merge request nvidia/container-toolkit/container-toolkit!411
2023-05-31 16:39:10 +00:00
Evan Lezar
b72764af5a Fix ui8 image job dependencies
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-31 17:54:41 +02:00
Evan Lezar
7e7c45fb0f Merge branch 'switch-to-centos7' into 'main'
Use centos7 packages instead of centos8 packages

See merge request nvidia/container-toolkit/container-toolkit!410
2023-05-31 15:17:08 +00:00
Evan Lezar
61f515b3dd Use centos7 packages in kitmaker archives
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-31 16:28:54 +02:00
Evan Lezar
e05686cbe8 Use centos7 packages for ubi8 image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-31 16:26:43 +02:00
Carlos Eduardo Arango Gutierrez
1fc8ae32bd Merge branch 'rorajani-rename-ci' into 'main'
Rename blossom ci file

See merge request nvidia/container-toolkit/container-toolkit!408
2023-05-30 11:41:33 +00:00
rorajani
e80d43f4c4 Rename blossom ci file
Signed-off-by: rorajani <rorajani@nvidia.com>
2023-05-30 16:56:32 +05:30
Evan Lezar
a6b0f45d2c Fix infinite recursion when resolving ldcache
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-30 11:03:36 +02:00
Evan Lezar
39263ea365 Add command to print ldcache
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-30 11:02:33 +02:00
Evan Lezar
9ea214d0b3 Correct typo in info command
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-30 10:58:30 +02:00
Evan Lezar
5371ff039b Merge branch 'CNT-4285/add-runtime-hook-path' into 'main'
Add nvidia-contianer-runtime-hook.path config option

See merge request nvidia/container-toolkit/container-toolkit!401
2023-05-26 08:29:52 +00:00
Evan Lezar
315f4adb8f Check for required device majors
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-26 10:24:36 +02:00
Evan Lezar
05632c0a40 Treat missing nvidia device majors as an error
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-26 10:24:36 +02:00
Evan Lezar
8df4a98d7b Merge branch 'pre-sanity-check' into 'main'
Add pre sanity check for gothub repo

See merge request nvidia/container-toolkit/container-toolkit!396
2023-05-25 14:35:43 +00:00
Evan Lezar
02656b624d Create log directory if required
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-25 15:17:00 +02:00
Evan Lezar
61af2aee8e Ignore errors when creating debug log file
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-25 14:44:00 +02:00
Evan Lezar
ddebd69128 Use installed hook path in toolkit container
This change uses the installed NVIDIA Container Runtime Hook wrapper
as the path in the applied config. This prevents conflicts with other
installations of the NVIDIA Container Toolkit.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-25 12:05:33 +02:00
Evan Lezar
ac11727ec5 Add nvidia-contianer-runtime-hook.path config option
This change adds an nvidia-container-runtime-hook.path config option
to allow the path used for the prestart hook to be overridden. This
is useful in cases where multiple NVIDIA Container Toolkit installations
are present.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-25 12:05:33 +02:00
Evan Lezar
5748d220ba Merge branch 'add-centos7-aarch64' into 'main'
Add centos7-aarch64 CI jobs

See merge request nvidia/container-toolkit/container-toolkit!403
2023-05-24 14:48:59 +00:00
Evan Lezar
3b86683843 Add centos7-aarch64 CI jobs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-24 16:48:14 +02:00
Evan Lezar
3bd5baa3c5 Merge branch 'add-centos7-aarch64' into 'main'
Add centos7-aarch64 targets

See merge request nvidia/container-toolkit/container-toolkit!402
2023-05-24 14:42:32 +00:00
Evan Lezar
330aa16687 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-24 15:32:34 +02:00
Evan Lezar
8a4d6b5bcf Add centos7-aarch64 targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-24 15:32:09 +02:00
Evan Lezar
40d0a88cf9 Merge branch 'update-go-nvlib' into 'main'
Update go-nvlib to skip non-MIG devices

See merge request nvidia/container-toolkit/container-toolkit!398
2023-05-24 08:38:21 +00:00
Evan Lezar
dc6a895db8 Merge branch 'pass-single-links-instead-of-csv' into 'main'
Pass individual links in create-symlinks hook instead of CSV filename

See merge request nvidia/container-toolkit/container-toolkit!394
2023-05-23 19:56:18 +00:00
Evan Lezar
3b1b89e6c0 Merge branch 'better-support-for-skipping-update' into 'main'
Skip update of components on SKIP_UPDATE_COMPONENTS=yes

See merge request nvidia/container-toolkit/container-toolkit!400
2023-05-23 19:17:29 +00:00
Evan Lezar
013a1b413b Fix ineffectual assignment
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-23 21:14:02 +02:00
Evan Lezar
3be16d8077 Create individual links instead of processing CSV
This change switches to generating a OCI runtime hook to create
individual symlinks instead of processing a CSV file in the hook.
This allows for better reuse of the logic generating CDI
specifications, for example.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-23 20:43:36 +02:00
Evan Lezar
927ec78b6e Add symlinks package with Resolve function
This change adds a symlinks.Resolve function for resolving symlinks and
updates usages across the code to make use of it.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-23 20:42:17 +02:00
Evan Lezar
8ca606f7ac Skip update of components on SKIP_UPDATE_COMPONENTS=yes
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-23 20:34:20 +02:00
Evan Lezar
e7d2a9c212 Merge branch 'CNT-1876/cdi-specs-from-csv' into 'main'
Add csv mode to CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!393
2023-05-23 14:47:19 +00:00
Evan Lezar
fcb4e379e3 Fix mode resolution tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-23 16:02:07 +02:00
rorajani
cda96f2f9e Add pre sanity
Signed-off-by: rorajani <rorajani@nvidia.com>
2023-05-22 20:39:50 +05:30
Evan Lezar
e11f65e51e Update go-nvlib to skip non-MIG devices
This change updates go-nvlib to ensure that non-migcapable GPUs
are skipped when generating CDI specifications for MIG devices.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-22 15:36:55 +02:00
Evan Lezar
3ea02d13fc Merge branch 'use-major-minor-for-cuda-version' into 'main'
Use *.* pattern when locating libcuda.so

See merge request nvidia/container-toolkit/container-toolkit!397
2023-05-22 13:02:33 +00:00
Evan Lezar
e30fd0f4ad Add csv mode to nvidia-ctk cdi generate command
This chagne allows the csv mode option to specified in the
nvidia-ctk cdi generate command and adds a --csv.file option
that can be repeated to specify the CSV files to be processed.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-22 13:56:45 +02:00
Evan Lezar
418e4014e6 Resolve to csv for CDI generation on tegra systems
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-22 13:56:00 +02:00
Evan Lezar
e78a4f5eac Add csv mode to nvcdi api
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-22 13:55:58 +02:00
Evan Lezar
540dbcbc03 Move tegra system mounts to tegra-specific discoverer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-22 13:55:22 +02:00
Evan Lezar
a8265f8846 Add tegra discoverer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-22 13:55:22 +02:00
Evan Lezar
c120c511d5 Merge branch 'CNT-3939/generate-all-device' into 'main'
Add options to generate all device to nvcdi API

See merge request nvidia/container-toolkit/container-toolkit!348
2023-05-22 11:53:39 +00:00
Evan Lezar
424b8c9d46 Use *.* pattern when locating libcuda.so
This change ensures that libcuda.so can be located on systems
where no patch version is specified in the driver version.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-22 13:53:19 +02:00
Evan Lezar
5bc72b70b8 Merge branch 'minor-refactor' into 'main'
Include xorg discoverer with graphics mounts

See merge request nvidia/container-toolkit/container-toolkit!392
2023-05-12 13:12:06 +00:00
Evan Lezar
fe37196788 Generate all device using merged transform
The nvcid api is extended to allow for merged device options to
be specified. If any options are specified, then a merged device
is generated.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-12 13:52:58 +02:00
Evan Lezar
ba44c50f4e Add MergedDevice transform to generate all device
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-12 13:52:58 +02:00
Evan Lezar
729ca941be Merge branch 'refactor-nvidia-ctk-path' into 'main'
Refactor discover.Config to prepare for CSV CDI spec generation.

See merge request nvidia/container-toolkit/container-toolkit!391
2023-05-12 10:45:36 +00:00
Evan Lezar
0ee947dba6 Merge branch 'CNT-4257/remove-redundant-packages' into 'main'
Remove config.toml from installation

See merge request nvidia/container-toolkit/container-toolkit!388
2023-05-12 10:41:47 +00:00
Evan Lezar
d1fd0a7384 Merge branch 'CNT-4270/centos7-packages' into 'main'
Publish centos7 packages as a kitmaker branch

See merge request nvidia/container-toolkit/container-toolkit!390
2023-05-11 10:10:18 +00:00
Evan Lezar
ae2c582138 Merge branch 'clean-scan-archives' into 'main'
Remove image archives after scan

See merge request nvidia/container-toolkit/container-toolkit!389
2023-05-11 10:10:05 +00:00
Evan Lezar
b7e5cef934 Include xorg discoverer with graphics mounts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 17:07:55 +02:00
Evan Lezar
9378d0cd0f Move discover.FindNvidiaCTK to config.ResolveNVIDIACTKPath
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 15:12:44 +02:00
Evan Lezar
f9df36c473 Rename config struct to options
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 15:12:00 +02:00
Evan Lezar
8bb0235c92 Remove discover.Config
These changes remove the use of discover.Config which was used
to pass the driver root and the nvidiaCTK path in some cases.

Instead, the nvidiaCTKPath is resolved at the begining of runtime
invocation to ensure that this is valid at all points where it is
used.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 15:03:37 +02:00
Evan Lezar
fc310e429e Publish centos7 packages as a kitmaker branch
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 13:44:47 +02:00
Evan Lezar
8d0ffb2fa5 Remove unneeded targets from scripts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 13:13:05 +02:00
Evan Lezar
9f07cc9ab2 Remove CVE_UPDATES from dockerfiles
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 13:10:10 +02:00
Evan Lezar
1fff80e10d Remove unused CI variables
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 13:10:10 +02:00
Evan Lezar
0a57cdc6e8 Remove redundant packaging targets from CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 13:10:10 +02:00
Evan Lezar
1a86b20f7c Remove config.toml from installation
Since the default configuration is now platform specific,
there is no need to install specific versions as part of
the package installation.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 13:10:10 +02:00
Evan Lezar
0068750a5c Remove image archives after scan
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-10 10:52:47 +02:00
Evan Lezar
ee47f26d1c Merge branch 'cdi-list' into 'main'
Add nvidia-ctk cdi list command

See merge request nvidia/container-toolkit/container-toolkit!387
2023-05-09 18:59:56 +00:00
Evan Lezar
3945abb2f2 Add nvidia-ctk cdi list command
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-09 19:59:00 +02:00
Evan Lezar
9de4f7f4b9 Merge branch 'CNT-4262/create-release-archives' into 'main'
Create and upload release archives to artifactory

See merge request nvidia/container-toolkit/container-toolkit!386
2023-05-09 14:38:10 +00:00
Evan Lezar
3610b5073b Add package release archive
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-09 16:01:53 +02:00
Evan Lezar
1991138185 Merge branch 'CNT-4259/check-for-images-before-release' into 'main'
Skip publishing of images if these already exist

See merge request nvidia/container-toolkit/container-toolkit!385
2023-05-09 09:03:52 +00:00
Evan Lezar
8ebc21cd1f Merge branch 'CNT-4260/add-packaging-scan-and-release' into 'main'
Add scan and release steps for packaging image

See merge request nvidia/container-toolkit/container-toolkit!384
2023-05-08 16:50:09 +00:00
Evan Lezar
1c1ce2c6f7 Use version from manifest to extract packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-08 16:32:35 +02:00
Evan Lezar
39b0830a66 Extract manifest from packaging image
Also include manifest.txt with, for example, version
info when extracting packages from the packagin image.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-08 15:55:27 +02:00
Evan Lezar
6b367445a3 Merge branch 'CNT-4016/add-nvidia-ctk-config-default' into 'main'
Add nvidia-ctk config generate-default command to generate default config file contents

See merge request nvidia/container-toolkit/container-toolkit!338
2023-05-08 10:40:42 +00:00
Evan Lezar
37c66fc33c Ensure that the nvidia-container-cli.user option is uncommented on suse
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-08 11:26:54 +02:00
Evan Lezar
1bd5798a99 Use toml representation to get defaults
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-08 11:26:53 +02:00
Evan Lezar
90c4c4811a Fallback to ldconfig if ldconfig.real does not exist
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-08 11:26:24 +02:00
Evan Lezar
49de170652 Generate default config.toml contents
This change adds a GetDefaultConfigToml function to the config package.

This function returns the default config in the form of raw TOML
including comments. This is useful for generating a default config at
installation time, with platform-specific differences codified.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-08 11:26:22 +02:00
Evan Lezar
07c89fa975 Always publish external images
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-08 10:27:48 +02:00
Evan Lezar
7a1f23e2e4 Skip publishing of images if these already exist
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-05 15:06:32 +02:00
Evan Lezar
25165b0771 Add scan and release steps for packaging image
This ensures that the artifacts associated with a particular
release version are preserved along with the container
images that are used as operands for this version.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-05 13:56:10 +02:00
Evan Lezar
3e7acec0b4 Add nvidia-ctk config generate-default command
This change adds a CLI command to generate a default config.
This config checks the host operating system to apply specific
modifications that were previously captured in static config
files.

These include:
* select /sbin/ldconfig or /sbin/ldconfig.real depending on which exists on the host
* set the user to allow device access on SUSE-based systems

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-03 16:11:05 +02:00
Evan Lezar
4165961d31 Rename config struct options to avoid conflict
This change renames the struct for storing CLI flag values options over
config to avoid a conflict with the config package.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-03 15:59:02 +02:00
Evan Lezar
2e3a12438a Fix toml definition in cli config struct
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-03 15:59:02 +02:00
Evan Lezar
731c99b52c Merge branch 'fix-cdi-permissions' into 'main'
Properly set spec permissions

See merge request nvidia/container-toolkit/container-toolkit!383
2023-05-03 08:47:58 +00:00
Evan Lezar
470b4eebd8 Properly set spec permissions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-03 10:45:11 +02:00
Carlos Eduardo Arango Gutierrez
6750df8e01 Merge branch 'fix-cdi-spec-permissions' into 'main'
Generate CDI specifications with 644 permissions to allow non-root clients to consume them

See merge request nvidia/container-toolkit/container-toolkit!381
2023-05-02 19:36:40 +00:00
Evan Lezar
8736d1e78f Merge branch 'fix/minor-spelling' into 'main'
chore(cmd): Fixing minor spelling error.

See merge request nvidia/container-toolkit/container-toolkit!382
2023-05-02 17:58:13 +00:00
Elliot Courant
140b1e33ef chore(cmd): Fixing minor spelling error.
Fixed a minor spelling error inside `nvidia-ctk system create-device-nodes`.

Signed-off-by: Elliot Courant <me@elliotcourant.dev>
2023-05-02 12:53:45 -05:00
Evan Lezar
3056428eda Generate spec file with 644 permissions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-02 16:47:44 +02:00
Evan Lezar
367a30827f Allow spec file permisions to be specified
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-02 16:27:50 +02:00
Evan Lezar
fe8ef9e0bd Merge branch 'fix-ld.so.conf-permissions' into 'main'
Create ld.so.conf file with permissions 644

See merge request nvidia/container-toolkit/container-toolkit!380
2023-05-02 10:51:40 +00:00
Evan Lezar
d77f46aa09 Create ld.so.conf file with permissions 644
By default, temporary files are created with permissions 600 and
this means that the files created when updating the ldcache are
not readable in non-root containers.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-05-02 12:51:27 +02:00
Evan Lezar
043e283db3 Merge branch 'nvidia-docker-as-meta-package' into 'main'
Support building nvidia-docker and nvidia-container-runtime as dist-independent packages

See merge request nvidia/container-toolkit/container-toolkit!379
2023-05-02 08:25:45 +00:00
Evan Lezar
2019f1e7ea Preserve timestamps when copying meta packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-26 17:03:36 +02:00
Evan Lezar
22c7178561 Build meta-packages before others
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-26 17:03:36 +02:00
Evan Lezar
525aeb102f Update third_party submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-26 13:52:56 +02:00
Evan Lezar
9fb5ac36ed Allow update of subcomponents to be skipped
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-26 13:14:19 +02:00
Evan Lezar
c30764b7cc Update build all components for meta packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-26 13:14:19 +02:00
Evan Lezar
8a2de90c28 Merge branch 'revert-kitmaker-workaround' into 'main'
Remove workaround to add libnvidia-container0 to kitmaker archive

See merge request nvidia/container-toolkit/container-toolkit!378
2023-04-26 10:10:23 +00:00
Evan Lezar
243c439bb8 Remove workaround to add libnvidia-container0 to kitmaker archive
In order to add the libnvidia-container0 packages to our ubuntu18.04
kitmaker archive, a workaround was added that downloaded these packages
before constructing the archive. Since the packages have now been
published -- and will not change -- this workaround is not longer needed.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-26 11:47:39 +02:00
Evan Lezar
060ac46bd8 Merge branch 'bump-runc' into 'main'
Bump golang version and update dependencies

See merge request nvidia/container-toolkit/container-toolkit!377
2023-04-25 10:28:07 +00:00
Evan Lezar
ae2a683929 Run go fmt
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-25 11:27:58 +02:00
Evan Lezar
2b5eeb8d24 Regenerate mocks for formatting
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-25 11:26:55 +02:00
Evan Lezar
bbb94be213 Bump golang version to 1.20.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-25 10:42:22 +02:00
Evan Lezar
e1c75aec6c Bump runc version and update vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-25 10:40:46 +02:00
Carlos Eduardo Arango Gutierrez
3030d281d9 Merge branch 'engine_export' into 'main'
Export pkg config/engine

See merge request nvidia/container-toolkit/container-toolkit!375
2023-04-25 05:17:20 +00:00
Carlos Eduardo Arango Gutierrez
81d8b94cdc Export pkg config/engine
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
2023-04-25 07:16:59 +02:00
Evan Lezar
276e1960b1 Merge branch 'CNT-2350/configure-containerd' into 'main'
Add support for containerd configs to nvidia-ctk runtime configure command

See merge request nvidia/container-toolkit/container-toolkit!355
2023-04-24 17:24:24 +00:00
Evan Lezar
70920d7a04 Add support for containerd to the runtime configure CLI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-24 18:32:28 +02:00
Evan Lezar
f1e201d368 Refactor runtime configure cli
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-24 18:32:04 +02:00
Evan Lezar
ef863f5fd4 Merge branch 'bump-version-1.14.0-rc.1' into 'main'
Bump version to 1.14.0-rc.1

See merge request nvidia/container-toolkit/container-toolkit!376
2023-04-24 16:27:05 +00:00
Evan Lezar
ce65df7d17 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-24 17:33:37 +02:00
Evan Lezar
fa9c6116a4 Bump version to 1.14.0-rc.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-24 17:33:04 +02:00
Evan Lezar
28b70663f1 Merge branch 'skip-for-point-release' into 'main'
Skip components for patch releases

See merge request nvidia/container-toolkit/container-toolkit!374
2023-04-24 12:12:36 +00:00
Evan Lezar
c0fe8f27eb Skip components for patch releases
This change ensures that the nvidia-docker2 and nvidia-container-runtime
components are not build and distributed for patch releases.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-24 14:00:10 +02:00
Evan Lezar
926ac77bc0 Merge branch 'fix-cdi-spec-generation-on-debian' into 'main'
Resolve all symlinks when finding libraries in LDCache

See merge request nvidia/container-toolkit/container-toolkit!370
2023-04-24 10:09:37 +00:00
Evan Lezar
fc7c8f7520 Resolve all symlinks in ldcache
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-21 17:28:49 +02:00
Evan Lezar
46c1c45d85 Add /usr/lib/current to search path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-21 11:47:42 +02:00
Evan Lezar
f99e863649 Merge branch 'CNT-4142/xorg-missing-not-fatal' into 'main'
Make discovery of Xorg libraries optional

See merge request nvidia/container-toolkit/container-toolkit!368
2023-04-21 09:47:10 +00:00
Evan Lezar
dcc21ece97 Merge branch 'add-debug-output' into 'main'
Fix target folder for kitmaker

See merge request nvidia/container-toolkit/container-toolkit!373
2023-04-20 18:38:05 +00:00
Evan Lezar
a53e3604a6 Fix target folder for kitmaker
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-20 20:37:46 +02:00
Evan Lezar
cfea6c1179 Merge branch 'add-debug-output' into 'main'
Properly create target folder for kitmaker

See merge request nvidia/container-toolkit/container-toolkit!372
2023-04-20 14:53:57 +00:00
Evan Lezar
4d1daa0b6c Properly create target folder for kitmaker
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-20 16:53:32 +02:00
Evan Lezar
df925bc7fd Merge branch 'include-libnvidia-container-in-kitmaker' into 'main'
Add a workaround to publish libnvidia-container0

See merge request nvidia/container-toolkit/container-toolkit!371
2023-04-20 08:29:02 +00:00
Evan Lezar
df22e37dfd Add a workaround to publish libnvidia-container0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-18 23:23:36 +02:00
Evan Lezar
2136266d1d Make discovery of Xorg libraries optional
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-13 18:41:38 +02:00
Evan Lezar
a95232dd33 Merge branch 'CNT-4144/non-ldcache' into 'main'
Only update ldcache if it exists

See merge request nvidia/container-toolkit/container-toolkit!369
2023-04-13 16:12:22 +00:00
Evan Lezar
29c6288128 Only update ldcache if it exists
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-13 17:18:09 +02:00
Evan Lezar
cd6fcb5297 Merge branch 'bump-version-1.13.1' into 'main'
Bump version to 1.13.1

See merge request nvidia/container-toolkit/container-toolkit!367
2023-04-13 11:12:37 +00:00
Evan Lezar
36989deff7 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-13 13:11:39 +02:00
Evan Lezar
7f6c9851fe Bump version to 1.13.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-04-13 13:11:09 +02:00
Evan Lezar
b7079454b5 Merge branch 'bump-versions' into 'main'
Bump nvidia-docker and nvidia-container-runtime versions

See merge request nvidia/container-toolkit/container-toolkit!366
2023-03-31 13:00:58 +00:00
Evan Lezar
448bd45ab4 Bump nvidia-docker and nvidia-container-runtime versions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-31 15:00:15 +02:00
Evan Lezar
dde6170df1 Merge branch 'bump-version-v1.13.0' into 'main'
Bump version to v1.13.0

See merge request nvidia/container-toolkit/container-toolkit!365
2023-03-31 10:58:18 +00:00
Evan Lezar
e4b9350e65 Update libnvidia-container to v1.13.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-31 11:38:01 +02:00
Evan Lezar
622a0649ce Bump version to v1.13.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-31 11:35:53 +02:00
Evan Lezar
f6983969ad Merge branch 'nvidia-ctk-cdi-transform' into 'main'
Add 'target-driver-root' option to 'nvidia-ctk cdi generate' to transform root...

See merge request nvidia/container-toolkit/container-toolkit!363
2023-03-28 20:05:12 +00:00
Evan Lezar
7f7fc35843 Move input and output to transform root subcommand
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 21:12:48 +02:00
Evan Lezar
8eef7e5406 Merge branch 'add-runtimes' into 'main'
Add nvidia-container-runtime.runtimes config option

See merge request nvidia/container-toolkit/container-toolkit!364
2023-03-28 18:58:46 +00:00
Evan Lezar
f27c33b45f Remove target-driver-root from generate
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 11:49:45 -07:00
Evan Lezar
6a83e2ebe5 Add nvidia-ctk cdi transform root command
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 11:45:58 -07:00
Christopher Desiniotis
ee5be5e3f2 Merge branch 'CNT-4056/add-cdi-annotations' into 'main'
Add nvidia-container-runtime.modes.cdi.annotation-prefixes config option.

See merge request nvidia/container-toolkit/container-toolkit!356
2023-03-28 16:47:51 +00:00
Evan Lezar
be0cc9dc6e Add nvidia-container-runtime.runtimes config option
This change adds an nvidia-container-runtime.runtimes config option.

If this is unset no changes are made to the config and the default values are used. This
allows this setting to be overridden in cases where this is required. One such example is
crio where crun is set as the default runtime.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 17:39:17 +02:00
Evan Lezar
7c5283bb97 Merge branch 'create-device-nodes' into 'main'
Add nvidia-ctk system create-device-nodes command

See merge request nvidia/container-toolkit/container-toolkit!362
2023-03-28 15:07:04 +00:00
Evan Lezar
4d5ba09d88 Add --ignore-errors option for testing
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 16:24:17 +02:00
Evan Lezar
149236b002 Configure containerd config based on specified annotation prefixes
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 16:22:48 +02:00
Evan Lezar
ee141f97dc Reorganise setting toolkit config options
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 16:22:48 +02:00
Evan Lezar
646503ff31 Set nvidia-container-runtime.modes.cdi.annotation-prefixes in toolkit-contianer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 16:22:48 +02:00
Evan Lezar
cdaaf5e46f Generate device nodes when creating management spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 11:29:45 +02:00
Evan Lezar
e774c51c97 Add nvidia-ctk system create-device-nodes command
This change adds an nvidia-ctk system create-device-nodes command for
creating NVIDIA device nodes. Currently this is limited to control devices
(nvidia-uvm, nvidia-uvm-tools, nvidia-modeset, nvidiactl).

A --dry-run mode is included for outputing commands that would be executed and
the driver root can be specified.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-28 11:29:45 +02:00
Christopher Desiniotis
7f5c9abc1e Add ability to configure CDI kind with 'nvidia-ctk cdi generate'
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-27 23:12:00 -07:00
Christopher Desiniotis
92d82ceaee Add 'target-driver-root' option to 'nvidia-ctk cdi generate' to transform root paths in generated spec
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-27 22:22:36 -07:00
Evan Lezar
c46b118f37 Add nvidia-container-runtime.modes.cdi.annotation-prefixes config option.
This change adds an nvidia-container-runtime.modes.cdi.annotation-prefixes config
option that defaults to cdi.k8s.io/. This allows the annotation prefixes parsed
for CDI devices to be overridden in cases where CDI support in container engines such
as containerd or crio need to be overridden.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-27 16:36:54 +02:00
Evan Lezar
1722b07615 Merge branch 'CNT-2264/xorg-libs' into 'main'
Inject xorg libs and config in container

See merge request nvidia/container-toolkit/container-toolkit!328
2023-03-27 14:19:52 +00:00
Evan Lezar
c13c6ebadb Inject xorg libs and config in container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 17:04:06 +02:00
Evan Lezar
2abe679dd1 Move libcuda locator to internal/lookup package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 17:04:06 +02:00
Evan Lezar
9571513601 Merge branch 'update-changelog' into 'main'
Update changelog

See merge request nvidia/container-toolkit/container-toolkit!361
2023-03-26 15:03:28 +00:00
Evan Lezar
ff2767ee7b Reorder changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 17:03:05 +02:00
Evan Lezar
56319475a6 Merge branch 'fix-changelog' into 'main'
Reorder changelog

See merge request nvidia/container-toolkit/container-toolkit!360
2023-03-26 14:52:27 +00:00
Evan Lezar
a3ee58a294 Reorder changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 16:51:59 +02:00
Evan Lezar
7a533aeff3 Merge branch 'update-nvcdi-new-with-error' into 'main'
Allow nvcdi.Option to return an error

See merge request nvidia/container-toolkit/container-toolkit!352
2023-03-26 14:13:41 +00:00
Evan Lezar
226c54613e Also return an error from nvcdi.New
This change allows nvcdi.New to return an error in addition to the
constructed library instead of panicing.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-26 16:13:12 +02:00
Evan Lezar
1ebbebf5de Merge branch 'CNT-3932/deduplicate-entries-in-cdi-spec' into 'main'
Add transform to deduplicate entities in CDI spec

See merge request nvidia/container-toolkit/container-toolkit!345
2023-03-24 19:04:43 +00:00
Evan Lezar
33f6fe0217 Generate a simplified CDI spec by default
As simplified CDI spec has no duplicate entities in any single set of container edits.
Furthermore, contianer edits defined at a spec-level are not included in the container
edits for a device.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-24 11:01:46 +02:00
Evan Lezar
5ff206e1a9 Add transform to deduplicate entities in CDI spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-24 11:01:23 +02:00
Evan Lezar
df618d3cba Merge branch 'CNT-4052/fix-arm-management-containers' into 'main'
Fix generation of management CDI spec in containers

See merge request nvidia/container-toolkit/container-toolkit!354
2023-03-23 16:39:10 +00:00
Evan Lezar
9506bd9da0 Fix generation of management CDI spec in containers
Since we relied on finding libcuda.so in the LDCache to determine both the CUDA
version and the expected directory for the driver libraries, the generation of the
management CDI specifications fails in containers where the LDCache has not been updated.

This change falls back to searching a set of predefined paths instead when the lookup of
libcuda.so in the cache fails.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-23 15:59:01 +02:00
Evan Lezar
5e0684e99d Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!353
2023-03-23 08:50:18 +00:00
Evan Lezar
09a0cb24cc Remove fedora make targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-23 10:35:57 +02:00
Evan Lezar
ff92f1d799 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-23 10:33:26 +02:00
Christopher Desiniotis
b87703c503 Merge branch 'fix-nil-logger-in-library-locator' into 'main'
Instantiate a logger when constructing a library Locator

See merge request nvidia/container-toolkit/container-toolkit!351
2023-03-21 21:54:14 +00:00
Christopher Desiniotis
b2aaa21b0a Instantiate a logger when constructing a library Locator
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-21 13:38:36 -07:00
Evan Lezar
310c15b046 Merge branch 'CNT-4026/only-init-nvml-when-required' into 'main'
Only init nvml as required when generating CDI specs

See merge request nvidia/container-toolkit/container-toolkit!344
2023-03-20 13:26:07 +00:00
Evan Lezar
685802b1ce Only init nvml as required when generating CDI specs
CDI generation modes such as management and wsl don't require
NVML. This change removes the top-level instantiation of nvmllib
and replaces it with an instanitation in the nvml CDI spec generation
code.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-20 14:24:08 +02:00
Evan Lezar
380eb8340a Merge branch 'blossom-ci' into 'main'
Add blossom-ci github action

See merge request nvidia/container-toolkit/container-toolkit!349
2023-03-20 09:56:23 +00:00
Evan Lezar
f98e1160f5 Update components with blossim-ci
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-20 11:06:44 +02:00
Evan Lezar
1962fd68df Merge branch 'locate-ipc-sockets-at-run' into 'main'
Locate persistenced and fabricmanager sockets at /run instead of /var/run

See merge request nvidia/container-toolkit/container-toolkit!347
2023-03-20 08:08:59 +00:00
Carlos Eduardo Arango Gutierrez
29813c1e14 Add blossom-ci github action
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
2023-03-17 16:16:27 +01:00
Evan Lezar
df40fbe03e Locate persistenced and fabricmanager sockets at /run instead of /var/run
This chagne prefers (non-symlink) sockets at /run over /var/run for
nvidia-persistenced and nvidia-fabricmanager sockets.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-17 09:23:48 +02:00
Carlos Eduardo Arango Gutierrez
7000c6074e Merge branch 'ci_rules' into 'main'
Rework pipeline triggers for MRs

See merge request nvidia/container-toolkit/container-toolkit!346
2023-03-15 13:15:23 +00:00
Evan Lezar
ef1fe3ab41 Rework pipeline triggers for MRs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 14:15:20 +02:00
Evan Lezar
fdd198b0e8 Merge branch 'bump-v1.13.0-rc.3' into 'main'
Bump version to v1.13.0-rc.3

See merge request nvidia/container-toolkit/container-toolkit!343
2023-03-15 07:50:50 +00:00
Evan Lezar
e37f77e02d Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 09:49:49 +02:00
Evan Lezar
3fcfee88be Bump version to v1.13.0-rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 09:26:19 +02:00
Evan Lezar
a082413d09 Merge branch 'trigger-ci-on-mrs-only' into 'main'
Add workflow rule to only trigger on MRs

See merge request nvidia/container-toolkit/container-toolkit!342
2023-03-15 07:10:30 +00:00
Evan Lezar
280f40508e Make pipeline manual on MRs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 08:51:18 +02:00
Evan Lezar
e2be0e2ff0 Add workflow rule to only trigger on MRs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-15 08:45:26 +02:00
Evan Lezar
dcff3118d9 Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!340
2023-03-14 13:54:11 +00:00
Evan Lezar
731168ec8d Update changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-14 15:05:36 +02:00
Evan Lezar
7b4435a0f8 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-14 15:05:29 +02:00
Evan Lezar
738af29724 Merge branch 'explicit-cdi-enabled-flag' into 'main'
Add --cdi-enabled option to control generating CDI spec

See merge request nvidia/container-toolkit/container-toolkit!339
2023-03-14 07:00:30 +00:00
Evan Lezar
08ef242afb Add --cdi-enabled option to control generating CDI spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-13 18:19:00 +02:00
Evan Lezar
92ea8be309 Merge branch 'fix-privileged-check-cdi-mode' into 'main'
Return empty list of devices for unprivileged containers when...

See merge request nvidia/container-toolkit/container-toolkit!337
2023-03-13 07:36:25 +00:00
Christopher Desiniotis
48414e97bb Return empty list of devices for unprivileged containers when 'accept-nvidia-visible-devices-envvar-unprivileged=false'
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-10 13:11:29 -08:00
Evan Lezar
77a2975524 Merge branch 'fix-kitmaker' into 'main'
Use component name as folder name

See merge request nvidia/container-toolkit/container-toolkit!336
2023-03-10 13:57:24 +00:00
Evan Lezar
ce9477966d Use component name as folder name
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-10 15:51:36 +02:00
Evan Lezar
fe02351c3a Merge branch 'bump-cuda-version' into 'main'
Bump CUDA base image version to 12.1.0

See merge request nvidia/container-toolkit/container-toolkit!335
2023-03-10 10:23:30 +00:00
Evan Lezar
9c2018a0dc Bump CUDA base image version to 12.1.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-10 11:31:23 +02:00
Evan Lezar
33e5b34fa1 Merge branch 'CNT-3999/legacy-cli-doesnt-work-in-cdi-mode' into 'main'
Add nvidia-container-runtime-hook.skip-mode-detection option to config

See merge request nvidia/container-toolkit/container-toolkit!330
2023-03-09 19:18:16 +00:00
Evan Lezar
ccf73f2505 Set skip-mode-detection in the toolkit-container by default
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 20:16:10 +02:00
Evan Lezar
3a11f6ee0a Add nvidia-container-runtime-hook.skip-mode-detection option to config
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 20:15:40 +02:00
Evan Lezar
8f694bbfb7 Merge branch 'set-nvidia-ctk-path' into 'main'
Set nvidia-ctk.path config option based on installed path

See merge request nvidia/container-toolkit/container-toolkit!334
2023-03-09 16:44:13 +00:00
Evan Lezar
4c2eff4865 Merge branch 'CNT-3998/cdi-accept-visible-devices-when-privileged' into 'main'
Honor accept-nvidia-visible-devices-envvar-when-unprivileged setting in CDI mode

See merge request nvidia/container-toolkit/container-toolkit!331
2023-03-09 15:59:08 +00:00
Evan Lezar
1fbdc17c40 Set nvidia-ctk.path config option based on installed path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 17:53:08 +02:00
Evan Lezar
965d62f326 Merge branch 'fix-containerd-integration-tests' into 'main'
Fix integration tests failing due to CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!333
2023-03-09 14:41:52 +00:00
Evan Lezar
25ea7fa98e Remove whitespace in Makefile
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 15:32:07 +02:00
Evan Lezar
5ee040ba95 Disable CDI spec generation for integration tests 2023-03-09 15:32:07 +02:00
Evan Lezar
eb2aec9da8 Allow CDI options to be set by envvars
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 12:25:05 +02:00
Evan Lezar
973e7bda5e Check accept-nvidia-visible-devices-envvar-when-unprivileged option for CDI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 11:15:53 +02:00
Evan Lezar
154cd4ecf3 Add to config struct
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 11:15:53 +02:00
Evan Lezar
936fad1d04 Move check for privileged images to config/image/ package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-09 11:15:53 +02:00
Evan Lezar
86dd046c7c Merge branch 'CNT-3928/allow-cdi-container-annotations' into 'main'
Add cdi.k8s.io annotations to runtimes configured in containerd

See merge request nvidia/container-toolkit/container-toolkit!315
2023-03-09 07:52:37 +00:00
Evan Lezar
510fb248fe Add cdi.k8s.io annotations to containerd config
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-08 07:23:27 +02:00
Evan Lezar
c7384c6aee Merge branch 'fix-comment' into 'main'
Fix comment

See merge request nvidia/container-toolkit/container-toolkit!329
2023-03-08 05:15:38 +00:00
Evan Lezar
1c3c9143f8 Fix comment
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-08 07:15:05 +02:00
Evan Lezar
1c696b1e39 Merge branch 'CNT-3894/configure-mode-specific-runtimes' into 'main'
Configure .cdi and .legacy executables in Toolkit Container

See merge request nvidia/container-toolkit/container-toolkit!308
2023-03-08 05:12:50 +00:00
Evan Lezar
a2adbc1133 Merge branch 'CNT-3898/improve-cdi-annotations' into 'main'
Improve handling of environment variable devices in CDI mode

See merge request nvidia/container-toolkit/container-toolkit!321
2023-03-08 04:37:41 +00:00
Evan Lezar
36576708f0 Merge branch 'CNT-3896/gds-mofed-devices' into 'main'
Add GDS and MOFED support to the NVCDI API

See merge request nvidia/container-toolkit/container-toolkit!323
2023-03-08 04:36:55 +00:00
Evan Lezar
cc7a6f166b Handle case were runtime name is set to predefined name
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:56 +02:00
Evan Lezar
62d88e7c95 Add cdi and legacy mode runtimes
This change adds .cdi and .legacy mode-specific runtimes the list of
runtimes supported by the operator. These are also installed as
part of the NVIDIA Container Toolkit.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:55 +02:00
Evan Lezar
dca8e3123f Migrate containerd config to engine.Interface
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:55 +02:00
Evan Lezar
3bac4fad09 Migrate cri-o config update to engine.Interface
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:54 +02:00
Evan Lezar
9fff19da23 Migrate docker config to engine.Interface
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:54 +02:00
Evan Lezar
e5bb4d2718 Move runtime config code from config to config/engine
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:54 +02:00
Evan Lezar
5bfb51f801 Add API for interacting with runtime engine configs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:53 +02:00
Evan Lezar
ece5b29d97 Add tools/container/operator package to handle runtime naming
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:53 +02:00
Evan Lezar
ec8a92c17f Use nvidia-container-runtime.experimental as wrapper
This change switches to using nvidia-container-runtime.experimental as the
wrapper name over nvidia-container-runtime-experimental. This is consistent
with upcoming mode-specific binaries.

The wrapper is created at nvidia-container-runtime.experimental.real.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 20:59:53 +02:00
Evan Lezar
868393b7ed Add mofed mode to nvcdi API
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 18:47:52 +02:00
Evan Lezar
ebe18fbb7f Add gds mode to nvcdi API
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 18:47:52 +02:00
Evan Lezar
9435343541 Merge branch 'fix-kitmaker' into 'main'
Include = when extracting manifest information

See merge request nvidia/container-toolkit/container-toolkit!327
2023-03-07 14:44:27 +00:00
Evan Lezar
1cd20afe4f Include = when extracting manifest information
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 16:43:49 +02:00
Evan Lezar
1e6fe40c76 Allow nvidia-container-runtime.modes.cdi.default-kind to be set
This change allows the nvidia-container-runtime.modes.cdi.default-kind
to be set in the toolkit-container.

The NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND envvar is used.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 16:19:38 +02:00
Evan Lezar
6d220ed9a2 Rework selection of devices in CDI mode
The following changes are made:
* The default-cdi-kind config option is used to convert an envvar entry to a fully-qualified device name
* If annotation devices exist, these are used instead of the envvar devices.
* The `all` device is no longer treated as a special case and MUST exist in the CDI spec.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 16:18:53 +02:00
Evan Lezar
f00439c93e Add nvidia-container-runtime.modes.csv.default-kind config option
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-07 16:18:53 +02:00
Evan Lezar
c59696e30e Merge branch 'fix-kitmaker' into 'main'
Log source file

See merge request nvidia/container-toolkit/container-toolkit!326
2023-03-06 16:26:43 +00:00
Evan Lezar
89c18c73cd Add source and log curl command
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 18:26:05 +02:00
Evan Lezar
cb5006c73f Merge branch 'CNT-3897/generate-management-container-spec' into 'main'
Generate CDI specs for management containers

See merge request nvidia/container-toolkit/container-toolkit!314
2023-03-06 16:23:13 +00:00
Evan Lezar
547b71f222 Merge branch 'change-discovery-mode' into 'main'
Rename --discovery-mode to --mode

See merge request nvidia/container-toolkit/container-toolkit!318
2023-03-06 16:21:22 +00:00
Evan Lezar
ae84bfb055 Log source file
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 18:11:12 +02:00
Evan Lezar
9b303d5b89 Merge branch 'fix-changelist' into 'main'
Strip on tilde for kitmaker version

See merge request nvidia/container-toolkit/container-toolkit!325
2023-03-06 14:10:54 +00:00
Evan Lezar
d944f934d7 Strip on tilde for kitmaker version
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 16:10:25 +02:00
Evan Lezar
c37209cd09 Merge branch 'fix-changelist' into 'main'
Fix blank changelist in kitmaker properties

See merge request nvidia/container-toolkit/container-toolkit!324
2023-03-06 13:51:19 +00:00
Evan Lezar
863b569a61 Fix blank changelist in kitmaker properties
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 15:50:38 +02:00
Evan Lezar
f36c514f1f Merge branch 'update-kitmaker-folders' into 'main'
Update kitmaker target folder

See merge request nvidia/container-toolkit/container-toolkit!313
2023-03-06 11:16:49 +00:00
Evan Lezar
3ab28c7fa4 Merge branch 'fix-rule-for-release' into 'main'
Run full build on release- branches

See merge request nvidia/container-toolkit/container-toolkit!320
2023-03-06 10:56:58 +00:00
Evan Lezar
c03258325b Run full build on release- branches
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 12:54:27 +02:00
Evan Lezar
20d3bb189b Rename --discovery-mode to --mode
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 11:00:22 +02:00
Evan Lezar
90acec60bb Skip CDI spec generation in integration tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:57:40 +02:00
Evan Lezar
0565888c03 Generate CDI spec in toolkit container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:57:40 +02:00
Evan Lezar
f7e817cff6 Support management mode in nvidia-ctk cdi generate
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:53:43 +02:00
Evan Lezar
29cbbe83f9 Add management mode to CDI spec generation API
These changes add support for generating a management spec to the nvcdi API.
A management spec consists of a single CDI device (`all`) which includes all expected
NVIDIA device nodes, driver libraries, binaries, and IPC sockets.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:53:43 +02:00
Evan Lezar
64b16acb1f Also install nvidia-ctk in toolkit-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-06 10:53:43 +02:00
Evan Lezar
19c20bb422 Merge branch 'CNT-3931/add-spec-validation' into 'main'
Add nvcdi.spec for writing and validating CDI specifications

See merge request nvidia/container-toolkit/container-toolkit!306
2023-03-06 08:52:56 +00:00
Evan Lezar
28b10d2ee0 Merge branch 'fix-toolkit-ctr-envvars' into 'main'
Fix handling of envvars in toolkit container which modify the NVIDIA Container Runtime config

See merge request nvidia/container-toolkit/container-toolkit!317
2023-03-06 07:36:03 +00:00
Christopher Desiniotis
1f5123f72a Fix handling of envvars in toolkit container which modify the NVIDIA Container Runtime config
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-03-05 20:14:04 -08:00
Evan Lezar
ac5b6d097b Use kitmaker folder for releases
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 17:27:07 +02:00
Evan Lezar
a7bf9ddf28 Update kitmaker folder structure
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 17:27:07 +02:00
Evan Lezar
e27479e170 Add GIT_COMMIT_SHORT to packaging image manifest
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 17:27:07 +02:00
Evan Lezar
fa28e738c6 Merge branch 'fix-internal-scans' into 'main'
Fix internal scans

See merge request nvidia/container-toolkit/container-toolkit!316
2023-03-01 15:26:27 +00:00
Evan Lezar
898c5555f6 Fix internal scans
This fixes the internal scans due to the removed ubuntu18.04 images.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 17:25:28 +02:00
Evan Lezar
314059fcf0 Move path manipulation to spec.Save
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:49:04 +02:00
Evan Lezar
221781bd0b Use full path for output spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
9f5e141437 Expose vendor and class as options
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
8be6de177f Move formatJSON and formatYAML to nvcdi/spec package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
890a519121 Use nvcdi.spec package to write and validate spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
89321edae6 Add top-level GetSpec function to nvcdi API
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 13:48:28 +02:00
Evan Lezar
6d6cd56196 Return nvcdi.spec.Interface from GetSpec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 12:45:30 +02:00
Evan Lezar
2e95e04359 Add nvcdi.spec package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-03-01 12:45:30 +02:00
Evan Lezar
accba4ead5 Merge branch 'CNT-3965/clean-up-by-path-symlinks' into 'main'
Improve handling of /dev/dri devices and nested device paths

See merge request nvidia/container-toolkit/container-toolkit!307
2023-03-01 10:25:48 +00:00
Christopher Desiniotis
1e9b7883cf Merge branch 'CNT-3937/add-target-driver-root' into 'main'
Add a driver root transformer to nvcdi

See merge request nvidia/container-toolkit/container-toolkit!300
2023-02-28 18:04:29 +00:00
Christopher Desiniotis
87e406eee6 Update root transformer tests to ensure container path is not modified
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-02-28 09:00:05 -08:00
Christopher Desiniotis
45ed3b0412 Handle hook arguments for creation of symlinks
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-02-28 09:00:02 -08:00
Christopher Desiniotis
0516fc96ca Add Transform interface and initial implemention for a root transform
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-02-28 08:56:13 -08:00
Evan Lezar
e7a435fd5b Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!312
2023-02-27 13:41:26 +00:00
Evan Lezar
7a249d7771 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 15:41:02 +02:00
Evan Lezar
7986ff9cee Merge branch 'CNT-3963/deduplicate-wsl-driverstore-paths' into 'main'
Deduplicate WSL driverstore paths

See merge request nvidia/container-toolkit/container-toolkit!304
2023-02-27 13:27:31 +00:00
Evan Lezar
b74c13d75f Merge branch 'fix-rpm-postun-scriptlet' into 'main'
nvidia-container-toolkit.spec: fix syntax error in postun scriptlet

See merge request nvidia/container-toolkit/container-toolkit!309
2023-02-27 12:36:49 +00:00
Evan Lezar
de8eeb87f4 Merge branch 'remove-outdated-platforms' into 'main'
Remove outdated platforms from CI

See merge request nvidia/container-toolkit/container-toolkit!310
2023-02-27 11:48:33 +00:00
Evan Lezar
36c4174de3 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 13:45:44 +02:00
Evan Lezar
3497936cdf Remove ubuntu18.04 toolkit-container image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:55:17 +02:00
Evan Lezar
81abc92743 Remove fedora35 from 'all' targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:31:38 +02:00
Evan Lezar
1ef8dc3137 Remove centos7-ppc64le from CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:30:29 +02:00
Evan Lezar
9a5c1bbe48 Remove ubuntu16.04 packages from CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:29:35 +02:00
Evan Lezar
30dff61376 Remove debian9 packages from CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-27 12:28:46 +02:00
Claudius Volz
de1bb68d19 nvidia-container-toolkit.spec: fix syntax error in postun scriptlet
Signed-off-by: Claudius Volz <c.volz@gmx.de>
2023-02-27 00:45:21 +01:00
Evan Lezar
06d8bb5019 Merge branch 'CNT-3965/dont-fail-chmod-hook' into 'main'
Skip paths with errors in chmod hook

See merge request nvidia/container-toolkit/container-toolkit!303
2023-02-22 15:20:26 +00:00
Evan Lezar
b4dc1f338d Generate nested device folder permission hooks per device
This change generates device folder permission hooks per device instead of
at a spec level. This ensures that the hook is not injected for a device that
does not have any nested device nodes.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-22 17:16:23 +02:00
Evan Lezar
181128fe73 Only include by-path-symlinks for injected device nodes
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-22 16:53:04 +02:00
Evan Lezar
252838e696 Merge branch 'bump-version-v1.13.0-rc.2' into 'main'
Bump version to v1.13.0-rc.2

See merge request nvidia/container-toolkit/container-toolkit!305
2023-02-21 13:11:00 +00:00
Evan Lezar
49f171a8b1 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 14:27:02 +02:00
Evan Lezar
3d12803ab3 Bump version to v1.13.0-rc.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 14:24:37 +02:00
Evan Lezar
a168091bfb Add v1.13.0-rc.1 Changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 14:23:52 +02:00
Evan Lezar
35fc57291f Deduplicate WSL driverstore paths
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 11:48:56 +02:00
Evan Lezar
2542224d7b Skip paths with errors in chmod hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-21 11:47:11 +02:00
Evan Lezar
882fbb3209 Merge branch 'add-cdi-auto-mode' into 'main'
Add constants for CDI mode to nvcdi API

See merge request nvidia/container-toolkit/container-toolkit!302
2023-02-20 14:41:07 +00:00
Evan Lezar
2680c45811 Add mode constants to nvcdi
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 16:33:51 +02:00
Evan Lezar
b76808dbd5 Add tests for CDI mode resolution
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 16:33:33 +02:00
Evan Lezar
ba50b50a15 Merge branch 'add-cdi-auto-mode' into 'main'
Add auto mode to CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!292
2023-02-20 14:30:33 +00:00
Evan Lezar
f6d3f8d471 Merge branch 'CNT-3895/add-runtime-mode-config' into 'main'
Add nvidia-container-runtime.mode config option

See merge request nvidia/container-toolkit/container-toolkit!299
2023-02-20 12:51:18 +00:00
Evan Lezar
d9859d66bf Update go vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 14:49:58 +02:00
Evan Lezar
4ccb0b9a53 Add and resolve auto discovery mode for cdi generation
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 14:49:58 +02:00
Evan Lezar
f36c775d50 Merge branch 'wsl2-wip' into 'main'
Add CDI Spec generation on WSL2

See merge request nvidia/container-toolkit/container-toolkit!289
2023-02-20 09:36:41 +00:00
Evan Lezar
b21dc929ef Add WSL2 discovery and spec generation
These changes add a wsl discovery mode to the nvidia-ctk cdi generate command.

If wsl mode is enabled, the driver store for the available devices is used as
the source for discovered entities.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
d226925fe7 Construct nvml-based CDI lib based on mode
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
20d6e9af04 Add --discovery-mode to nvidia-ctk cdi generate command
This change adds --discovery-mode flag to the nvidia-ctk cdi generate
command and plumbs this through to the CDI API.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
5103adab89 Add mode option to nvcdi API
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
7eb435eb73 Add basic dxcore bindings
This change copies dxcore.h and dxcore.c from libnvidia-container to
allow for the driver store path to be queried. Modifications are made
to dxcore to remove the code associated with checking the components
in the driver store path.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
5d011c1333 Add Discoverer to create a single symlink
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:30:13 +02:00
Evan Lezar
6adb792d57 Merge branch 'fix-nvidia-ctk-path' into 'main'
Ensure that generate uses a consistent nvidia-ctk path

See merge request nvidia/container-toolkit/container-toolkit!301
2023-02-20 08:29:44 +00:00
Evan Lezar
a844749791 Ensure that generate uses a consistent nvidia-ctk path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-20 10:28:45 +02:00
Evan Lezar
dd0d43e726 Add nvidia-container-runtime.mode config option
This change allows the nvidia-container-runtime.mode option to be set
by the toolkit container.

This is controlled by the --nvidia-container-runtime-mode command line
argument and the NVIDIA_CONTAINER_RUNTIME_MODE envvar.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-17 18:04:49 +02:00
Evan Lezar
25811471fa Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!298
2023-02-17 08:46:56 +00:00
Evan Lezar
569bc1a889 Update Changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-17 10:46:21 +02:00
Evan Lezar
b1756b410a Merge branch 'fix-logging' into 'main'
Fix nvidia-container-runtime logging

See merge request nvidia/container-toolkit/container-toolkit!296
2023-02-16 15:17:24 +00:00
Evan Lezar
7789ac6331 Fix logger.Update and Reset
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-16 15:22:56 +01:00
Evan Lezar
7a3aabbbda Add logger test
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-16 15:22:56 +01:00
Evan Lezar
e486095603 Merge branch 'fix-nvidia-ctk-path' into 'main'
Fix issue with blank nvidia-ctk path

See merge request nvidia/container-toolkit/container-toolkit!297
2023-02-16 13:58:43 +00:00
Evan Lezar
bf6babe07e Fix issue with blank nvidia-ctk path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-16 14:18:07 +01:00
Kevin Klues
d5a4d89682 Merge branch 'support-multimple-firmware-files' into 'main'
Add globbing for mounting multiple GSP firmware files

See merge request nvidia/container-toolkit/container-toolkit!295
2023-02-16 13:09:47 +00:00
Kevin Klues
5710b9e7e8 Add globbing for mounting multiple GSP firmware files
Newer drivers have split the GSP firmware into multiple files so a simple match
against gsp.bin in the firmware directory is no longer possible. This patch
adds globbing capabilitis to match any GSP firmware files of the form gsp*.bin
and mount them all into the container.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
2023-02-16 11:53:36 +00:00
Evan Lezar
b4ab95f00c Merge branch 'fix-nvcdi-constructor' into 'main'
fix: apply options when constructing an instance of the nvcdi library

See merge request nvidia/container-toolkit/container-toolkit!294
2023-02-15 08:13:19 +00:00
Christopher Desiniotis
a52c9f0ac6 fix: apply options when constructing an instance of the nvcdi library
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2023-02-14 16:32:40 -08:00
Evan Lezar
b6bab4d3fd Merge branch 'expose-generate-spec' into 'main'
Implement basic CDI spec generation API

See merge request nvidia/container-toolkit/container-toolkit!257
2023-02-14 19:36:31 +00:00
Evan Lezar
5b110fba2d Add nvcdi package with basic CDI generation API
This change adds an nvcdi package that exposes a basic API for
CDI spec generation. This is used from the nvidia-ctk cdi generate
command and can be consumed by DRA implementations and the device plugin.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-14 19:52:31 +01:00
Evan Lezar
179133c8ad Merge branch 'fix-ubi8' into 'main'
Fix package version in ubi8 container builds

See merge request nvidia/container-toolkit/container-toolkit!293
2023-02-14 10:31:21 +00:00
Evan Lezar
365b6c7bc2 Fix package version in ubi8 container builds
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-14 10:50:38 +01:00
Evan Lezar
dc4887cd44 Merge branch 'cdi-executable' into 'main'
Add nvidia-container-runtime.{{MODE}} executable that overrides runtime mode

See merge request nvidia/container-toolkit/container-toolkit!288
2023-02-14 08:01:41 +00:00
Evan Lezar
c4836a576f Also skip nvidia-container-toolit-operator-extensions in release scripts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:10:01 +01:00
Evan Lezar
98afe0d27a Generate nvidia-container-toolkit-operator-extensions package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
fdc759f7c2 Add nvidia-container-runtime.legacy executable
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
43448bac11 Add nvidia-container-runtime.cdi executable
This change adds an nvidia-container-runtime.cdi executable that
overrides the runtime mode from the config to "cdi".

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
456d2864a6 Log config in JSON if possible
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
406a5ec76f Implement runtime package for creating runtime CLI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
f71c419cfb Move modifying OCI runtime wrapper to oci package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:46 +01:00
Evan Lezar
babb73295f Update gitignore
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 16:09:45 +01:00
Evan Lezar
f3ec5fd329 Merge branch 'packaging-verisons' into 'main'
Align release candidate RPM version with Debian version

See merge request nvidia/container-toolkit/container-toolkit!291
2023-02-13 14:53:01 +00:00
Evan Lezar
5aca0d147d Use - as version-tag separator for libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 15:10:08 +01:00
Evan Lezar
f2b19b6ae9 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 14:39:14 +01:00
Evan Lezar
7cb9ed66be Align release candidate RPM version with Debian version
The version for RPM release candidates has the form `1.13.0-0.1.rc.1-1` whereas debian packages have the form `1.13.0~rc.1-1`.

Note that since the `~` is handled in [the same way](https://docs.fedoraproject.org/en-US/packaging-guidelines/Versioning/#_handling_non_sorting_versions_with_tilde_dot_and_caret) as for Debian packages, there does not seem to be a specific reason for this and dealing with multiple version strings in our entire pipeline adds complexity.

This change aligns the package versioning for rpm packages with Debian packages.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 14:31:23 +01:00
Evan Lezar
d578f4598a Remove fedora35 pipeline targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-13 14:31:23 +01:00
Evan Lezar
d30e6c23ab Merge branch 'update-ldflags' into 'main'
Update ldflags for cgo

See merge request nvidia/container-toolkit/container-toolkit!290
2023-02-10 14:17:53 +00:00
Evan Lezar
1c05f2fb9a Merge branch 'add-options-to-mounts' into 'main'
Add Options to mounts to refactor IPC CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!287
2023-02-10 08:04:24 +00:00
Evan Lezar
1407ace94a Update ldflags for cgo
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 21:54:49 +01:00
Evan Lezar
97008f2db6 Move IPC discoverer into DriverDiscoverer
This simplifies the construction of the required common edits
when constructing a CDI specification.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
076eed7eb4 Update ipcMount to add noexec option
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
33c7b056ea Add ipcMounts type
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
3b8c40c3e6 Move IPC discoverer to internal/discover package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
3f70521a63 Add Options to discover.Mount
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-08 09:06:07 +01:00
Evan Lezar
21f5895b5a Merge branch 'bump-version-1.13.0-rc.1' into 'main'
Bump version to 1.13.0-rc.1

See merge request nvidia/container-toolkit/container-toolkit!286
2023-02-07 11:38:18 +00:00
Evan Lezar
738a2e7343 Bump version to 1.13.0-rc.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-07 11:55:47 +01:00
Evan Lezar
62bd015475 Merge branch 'bump-version-v1.12.0' into 'main'
Bump version to v1.12.0

See merge request nvidia/container-toolkit/container-toolkit!285
2023-02-03 14:07:30 +00:00
Evan Lezar
ac5c62c116 Bump CUDA base images to 12.0.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-03 14:25:42 +01:00
Evan Lezar
80fe1065ad Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-03 14:23:49 +01:00
Evan Lezar
fea195cc8d Bump version to v1.12.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-03 13:58:26 +01:00
Evan Lezar
9ef314e1e3 Merge branch 'rename-root-flag' into 'main'
Rename root to driverRoot for CDI generation

See merge request nvidia/container-toolkit/container-toolkit!284
2023-02-02 16:33:24 +00:00
Evan Lezar
95f859118b Update changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 15:58:00 +01:00
Evan Lezar
daceac9117 Rename discover.Config.Root to discover.Config.DriverRoot
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 15:57:15 +01:00
Evan Lezar
cfa2647260 Rename root to driverRoot for CDI generation
This makes the intent of the command line argument clearer since this
relates specifically to the root where the NVIDIA driver is installed.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 15:42:04 +01:00
Evan Lezar
03cdf3b5d7 Merge branch 'bump-version-v1.12.0-rc.6' into 'main'
Bump version to v1.12.0-rc.6

See merge request nvidia/container-toolkit/container-toolkit!283
2023-02-02 13:50:25 +00:00
Evan Lezar
f8f415a605 Ensure container-archive name is unique
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 12:03:34 +01:00
Evan Lezar
fe117d3916 Udpate libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 12:02:57 +01:00
Evan Lezar
069536d598 Bump version to v1.12.0-rc.6
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 12:00:08 +01:00
Evan Lezar
5f53ca0af5 Add missing v1.12.0-rc.5 changelog entry
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-02 11:58:44 +01:00
Evan Lezar
9a06768863 Merge branch 'fix-nvidia-ctk-path' into 'main'
Only use configured nvidia-ctk path if it is a full path

See merge request nvidia/container-toolkit/container-toolkit!281
2023-02-01 11:42:09 +00:00
Evan Lezar
0c8379f681 Fix nvidia-ctk path for update ldcache hook
This change ensures that the update-ldcache hook is created in a manner
consistent with other nvidia-ctk hooks ensuring that a full path is
used.

Without this change the update-ldcache hook on Tegra-based sytems had an
invalid path.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-01 12:00:23 +01:00
Evan Lezar
92dc0506fe Add hook path to logger output
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-01 12:00:23 +01:00
Evan Lezar
7045a223d2 Only use configured nvidia-ctk path if it is a full path
If this is not done, the default config which sets the nvidia-ctk.path
option as "nvidia-ctk" will result in an invalid OCI spec if a hook is
injected. This change ensures that the path used is always an absolute
path as required by the hook spec.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-01 12:00:23 +01:00
Evan Lezar
763e4936cd Merge branch 'fix-kitmaker' into 'main'
Add additional build args to manifest

See merge request nvidia/container-toolkit/container-toolkit!279
2023-02-01 09:48:08 +00:00
Evan Lezar
f0c7491029 Use 'main' as branch component in kitmaker archive
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-01 04:54:59 +01:00
Evan Lezar
ba5c4b2831 Use package version as version
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-01 04:54:59 +01:00
Evan Lezar
9c73438682 Add additional build args to manifest
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-01 04:54:59 +01:00
Evan Lezar
37f7337d2b Merge branch 'bump-version-1.12.0-rc.5' into 'main'
Bump version to 1.12.0-rc.5

See merge request nvidia/container-toolkit/container-toolkit!280
2023-02-01 03:18:26 +00:00
Evan Lezar
98285c27ab Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-01 04:17:42 +01:00
Evan Lezar
5750881cea Bump version to 1.12.0-rc.5
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2023-02-01 04:08:57 +01:00
560 changed files with 39384 additions and 12188 deletions

View File

@@ -23,6 +23,7 @@ variables:
BUILD_MULTI_ARCH_IMAGES: "true"
stages:
- trigger
- image
- lint
- go-checks
@@ -34,91 +35,68 @@ stages:
- scan
- release
.pipeline-trigger-rules:
rules:
# We trigger the pipeline if started manually
- if: $CI_PIPELINE_SOURCE == "web"
# We trigger the pipeline on the main branch
- if: $CI_COMMIT_BRANCH == "main"
# We trigger the pipeline on the release- branches
- if: $CI_COMMIT_BRANCH =~ /^release-.*$/
# We trigger the pipeline on tags
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
workflow:
rules:
# We trigger the pipeline on a merge request
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
# We then add all the regular triggers
- !reference [.pipeline-trigger-rules, rules]
# The main or manual job is used to filter out distributions or architectures that are not required on
# every build.
.main-or-manual:
rules:
- if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
- !reference [.pipeline-trigger-rules, rules]
- if: $CI_PIPELINE_SOURCE == "schedule"
when: manual
# Define the distribution targets
.dist-amazonlinux2:
# The trigger-pipeline job adds a manualy triggered job to the pipeline on merge requests.
trigger-pipeline:
stage: trigger
script:
- echo "starting pipeline"
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: amazonlinux2
PACKAGE_REPO_TYPE: rpm
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
when: manual
allow_failure: false
- when: always
# Define the distribution targets
.dist-centos7:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: centos7
CVE_UPDATES: "cyrus-sasl-lib"
PACKAGE_REPO_TYPE: rpm
.dist-centos8:
variables:
DIST: centos8
CVE_UPDATES: "cyrus-sasl-lib"
PACKAGE_REPO_TYPE: rpm
.dist-debian10:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: debian10
PACKAGE_REPO_TYPE: debian
.dist-debian9:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: debian9
PACKAGE_REPO_TYPE: debian
.dist-fedora35:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: fedora35
PACKAGE_REPO_TYPE: rpm
.dist-opensuse-leap15.1:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: opensuse-leap15.1
PACKAGE_REPO_TYPE: rpm
.dist-ubi8:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: ubi8
CVE_UPDATES: "cyrus-sasl-lib"
PACKAGE_REPO_TYPE: rpm
.dist-ubuntu16.04:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: ubuntu16.04
PACKAGE_REPO_TYPE: debian
.dist-ubuntu18.04:
variables:
DIST: ubuntu18.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
PACKAGE_REPO_TYPE: debian
.dist-ubuntu20.04:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: ubuntu20.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
PACKAGE_REPO_TYPE: debian
.dist-packaging:
variables:
@@ -236,6 +214,8 @@ test-packaging:
.release:external:
extends:
- .release
variables:
FORCE_PUBLISH_IMAGES: "yes"
rules:
- if: $CI_COMMIT_TAG
variables:
@@ -259,22 +239,15 @@ release:staging-ubi8:
needs:
- image-ubi8
release:staging-ubuntu18.04:
extends:
- .release:staging
- .dist-ubuntu18.04
needs:
- test-toolkit-ubuntu18.04
- test-containerd-ubuntu18.04
- test-crio-ubuntu18.04
- test-docker-ubuntu18.04
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
- test-toolkit-ubuntu20.04
- test-containerd-ubuntu20.04
- test-crio-ubuntu20.04
- test-docker-ubuntu20.04
release:staging-packaging:
extends:

113
.github/workflows/blossom-ci.yml vendored Normal file
View File

@@ -0,0 +1,113 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A workflow to trigger ci on hybrid infra (github + self hosted runner)
name: Blossom-CI
on:
issue_comment:
types: [created]
workflow_dispatch:
inputs:
platform:
description: 'runs-on argument'
required: false
args:
description: 'argument'
required: false
jobs:
Authorization:
name: Authorization
runs-on: blossom
outputs:
args: ${{ env.args }}
# This job only runs for pull request comments
if: |
contains( '\
anstockatnv,\
rorajani,\
cdesiniotis,\
shivamerla,\
ArangoGutierrez,\
elezar,\
klueska,\
zvonkok,\
', format('{0},', github.actor)) &&
github.event.comment.body == '/blossom-ci'
steps:
- name: Check if comment is issued by authorized person
run: blossom-ci
env:
OPERATION: 'AUTH'
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
Vulnerability-scan:
name: Vulnerability scan
needs: [Authorization]
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
lfs: 'true'
# repo specific steps
#- name: Setup java
# uses: actions/setup-java@v1
# with:
# java-version: 1.8
# add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
#- name: Setup blackduck properties
# run: |
# PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
# echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
# echo detect.maven.included.scopes=compile >> application.properties
- name: Run blossom action
uses: NVIDIA/blossom-action@main
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
with:
args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
Job-trigger:
name: Start ci job
needs: [Vulnerability-scan]
runs-on: blossom
steps:
- name: Start ci job
run: blossom-ci
env:
OPERATION: 'START-CI-JOB'
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Upload-Log:
name: Upload log
runs-on: blossom
if : github.event_name == 'workflow_dispatch'
steps:
- name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here)
run: blossom-ci
env:
OPERATION: 'POST-PROCESSING'
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}

22
.github/workflows/pre-sanity.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: Run pre sanity
# run this workflow for each commit
on: [pull_request]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Build dev image
run: make .build-image
- name: Build
run: make docker-build
- name: Tests
run: make docker-coverage
- name: Checks
run: make docker-check

2
.gitignore vendored
View File

@@ -1,9 +1,11 @@
dist
artifacts
*.swp
*.swo
/coverage.out*
/test/output/
/nvidia-container-runtime
/nvidia-container-runtime.*
/nvidia-container-runtime-hook
/nvidia-container-toolkit
/nvidia-ctk

View File

@@ -102,25 +102,35 @@ unit-tests:
name: ${ARTIFACTS_NAME}
paths:
- ${ARTIFACTS_ROOT}
needs:
- job: package-meta-packages
artifacts: true
# Define the package build targets
package-amazonlinux2-aarch64:
package-meta-packages:
extends:
- .package-build
- .dist-amazonlinux2
- .arch-aarch64
- .package-artifacts
stage: package-build
variables:
SKIP_LIBNVIDIA_CONTAINER: "yes"
SKIP_NVIDIA_CONTAINER_TOOLKIT: "yes"
parallel:
matrix:
- PACKAGING: [deb, rpm]
before_script:
- apk add --no-cache coreutils build-base sed git bash make
script:
- ./scripts/build-packages.sh ${PACKAGING}
artifacts:
name: ${ARTIFACTS_NAME}
paths:
- ${ARTIFACTS_ROOT}
package-amazonlinux2-x86_64:
extends:
- .package-build
- .dist-amazonlinux2
- .arch-x86_64
package-centos7-ppc64le:
package-centos7-aarch64:
extends:
- .package-build
- .dist-centos7
- .arch-ppc64le
- .arch-aarch64
package-centos7-x86_64:
extends:
@@ -146,48 +156,6 @@ package-centos8-x86_64:
- .dist-centos8
- .arch-x86_64
package-debian10-amd64:
extends:
- .package-build
- .dist-debian10
- .arch-amd64
package-debian9-amd64:
extends:
- .package-build
- .dist-debian9
- .arch-amd64
package-fedora35-aarch64:
extends:
- .package-build
- .dist-fedora35
- .arch-aarch64
package-fedora35-x86_64:
extends:
- .package-build
- .dist-fedora35
- .arch-x86_64
package-opensuse-leap15.1-x86_64:
extends:
- .package-build
- .dist-opensuse-leap15.1
- .arch-x86_64
package-ubuntu16.04-amd64:
extends:
- .package-build
- .dist-ubuntu16.04
- .arch-amd64
package-ubuntu16.04-ppc64le:
extends:
- .package-build
- .dist-ubuntu16.04
- .arch-ppc64le
package-ubuntu18.04-amd64:
extends:
- .package-build
@@ -240,7 +208,6 @@ image-centos7:
- .package-artifacts
- .dist-centos7
needs:
- package-centos7-ppc64le
- package-centos7-x86_64
image-ubi8:
@@ -249,21 +216,9 @@ image-ubi8:
- .package-artifacts
- .dist-ubi8
needs:
# Note: The ubi8 image uses the centos8 packages
- package-centos8-aarch64
- package-centos8-x86_64
- package-centos8-ppc64le
image-ubuntu18.04:
extends:
- .image-build
- .package-artifacts
- .dist-ubuntu18.04
needs:
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- job: package-ubuntu18.04-ppc64le
optional: true
# Note: The ubi8 image uses the centos7 packages
- package-centos7-aarch64
- package-centos7-x86_64
image-ubuntu20.04:
extends:
@@ -273,7 +228,8 @@ image-ubuntu20.04:
needs:
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- package-ubuntu18.04-ppc64le
- job: package-ubuntu18.04-ppc64le
optional: true
# The DIST=packaging target creates an image containing all built packages
image-packaging:
@@ -290,7 +246,7 @@ image-packaging:
optional: true
- job: package-amazonlinux2-x86_64
optional: true
- job: package-centos7-ppc64le
- job: package-centos7-aarch64
optional: true
- job: package-centos7-x86_64
optional: true
@@ -298,18 +254,8 @@ image-packaging:
optional: true
- job: package-debian10-amd64
optional: true
- job: package-debian9-amd64
optional: true
- job: package-fedora35-aarch64
optional: true
- job: package-fedora35-x86_64
optional: true
- job: package-opensuse-leap15.1-x86_64
optional: true
- job: package-ubuntu16.04-amd64
optional: true
- job: package-ubuntu16.04-ppc64le
optional: true
- job: package-ubuntu18.04-ppc64le
optional: true
@@ -343,31 +289,31 @@ image-packaging:
TEST_CASES: "crio"
# Define the test targets
test-toolkit-ubuntu18.04:
test-toolkit-ubuntu20.04:
extends:
- .test:toolkit
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04
test-containerd-ubuntu18.04:
test-containerd-ubuntu20.04:
extends:
- .test:containerd
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04
test-crio-ubuntu18.04:
test-crio-ubuntu20.04:
extends:
- .test:crio
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04
test-docker-ubuntu18.04:
test-docker-ubuntu20.04:
extends:
- .test:docker
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04

8
.gitmodules vendored
View File

@@ -2,11 +2,3 @@
path = third_party/libnvidia-container
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = main
[submodule "third_party/nvidia-container-runtime"]
path = third_party/nvidia-container-runtime
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git
branch = main
[submodule "third_party/nvidia-docker"]
path = third_party/nvidia-docker
url = https://gitlab.com/nvidia/container-toolkit/nvidia-docker.git
branch = main

View File

@@ -36,8 +36,8 @@ variables:
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
ARTIFACTORY_REPO_BASE: "https://urm.nvidia.com/artifactory/sw-gpu-cloudnative"
# TODO: We should set the kitmaker release folder here once we have the end-to-end workflow set up
KITMAKER_RELEASE_FOLDER: "testing"
KITMAKER_RELEASE_FOLDER: "kitmaker"
PACKAGE_ARCHIVE_RELEASE_FOLDER: "releases"
.image-pull:
stage: image-build
@@ -79,11 +79,6 @@ image-ubi8:
- .dist-ubi8
- .image-pull
image-ubuntu18.04:
extends:
- .dist-ubuntu18.04
- .image-pull
image-ubuntu20.04:
extends:
- .dist-ubuntu20.04
@@ -110,7 +105,7 @@ image-packaging:
image: "${PULSE_IMAGE}"
variables:
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
IMAGE_ARCHIVE: "container-toolkit.tar"
IMAGE_ARCHIVE: "container-toolkit-${DIST}-${ARCH}-${CI_JOB_ID}.tar"
rules:
- if: $SKIP_SCANS != "yes"
- when: manual
@@ -125,6 +120,7 @@ image-packaging:
- if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi
script:
- pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
- rm -f "${IMAGE_ARCHIVE}"
artifacts:
when: always
expire_in: 1 week
@@ -144,23 +140,6 @@ scan-centos7-amd64:
needs:
- image-centos7
scan-centos7-arm64:
extends:
- .dist-centos7
- .platform-arm64
- .scan
needs:
- image-centos7
- scan-centos7-amd64
scan-ubuntu18.04-amd64:
extends:
- .dist-ubuntu18.04
- .platform-amd64
- .scan
needs:
- image-ubuntu18.04
scan-ubuntu20.04-amd64:
extends:
- .dist-ubuntu20.04
@@ -195,6 +174,13 @@ scan-ubi8-arm64:
- image-ubi8
- scan-ubi8-amd64
scan-packaging:
extends:
- .dist-packaging
- .scan
needs:
- image-packaging
# Define external release helpers
.release:ngc:
extends:
@@ -217,25 +203,43 @@ scan-ubi8-arm64:
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
KITMAKER_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${KITMAKER_RELEASE_FOLDER}"
ARTIFACTS_DIR: "${CI_PROJECT_DIR}/artifacts"
script:
- !reference [.regctl-setup, before_script]
- apk add --no-cache bash git
- regctl registry login "${PACKAGE_REGISTRY}" -u "${PACKAGE_REGISTRY_USER}" -p "${PACKAGE_REGISTRY_TOKEN}"
- ./scripts/extract-packages.sh "${PACKAGE_IMAGE_NAME}:${PACKAGE_IMAGE_TAG}"
# TODO: ./scripts/release-packages-artifactory.sh "${DIST}-${ARCH}" "${PACKAGE_ARTIFACTORY_REPO}"
- ./scripts/release-kitmaker-artifactory.sh "${KITMAKER_ARTIFACTORY_REPO}"
- rm -rf ${ARTIFACTS_DIR}
# Define the package release targets
release:packages:kitmaker:
extends:
- .release:packages
release:staging-ubuntu18.04:
release:archive:
extends:
- .release:external
needs:
- image-packaging
variables:
VERSION: "${CI_COMMIT_SHORT_SHA}"
PACKAGE_REGISTRY: "${CI_REGISTRY}"
PACKAGE_REGISTRY_USER: "${CI_REGISTRY_USER}"
PACKAGE_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
PACKAGE_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
PACKAGE_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}-packaging"
PACKAGE_ARCHIVE_ARTIFACTORY_REPO: "${ARTIFACTORY_REPO_BASE}-generic-local/${PACKAGE_ARCHIVE_RELEASE_FOLDER}"
script:
- apk add --no-cache bash git
- ./scripts/archive-packages.sh "${PACKAGE_ARCHIVE_ARTIFACTORY_REPO}"
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu18.04
- .dist-ubuntu20.04
needs:
- image-ubuntu18.04
- image-ubuntu20.04
# Define the external release targets
# Release to NGC
@@ -244,11 +248,6 @@ release:ngc-centos7:
- .dist-centos7
- .release:ngc
release:ngc-ubuntu18.04:
extends:
- .dist-ubuntu18.04
- .release:ngc
release:ngc-ubuntu20.04:
extends:
- .dist-ubuntu20.04
@@ -258,3 +257,8 @@ release:ngc-ubi8:
extends:
- .dist-ubi8
- .release:ngc
release:ngc-packaging:
extends:
- .dist-packaging
- .release:ngc

View File

@@ -1,10 +1,146 @@
# NVIDIA Container Toolkit Changelog
## v1.14.1
* Fixed bug where contents of `/etc/nvidia-container-runtime/config.toml` is ignored by the NVIDIA Container Runtime Hook.
* [libnvidia-container] Use libelf.so on RPM-based systems due to removed mageia repositories hosting pmake and bmake.
## v1.14.0
* Promote v1.14.0-rc.3 to v1.14.0
## v1.14.0-rc.3
* Added support for generating OCI hook JSON file to `nvidia-ctk runtime configure` command.
* Remove installation of OCI hook JSON from RPM package.
* Refactored config for `nvidia-container-runtime-hook`.
* Added a `nvidia-ctk config` command which supports setting config options using a `--set` flag.
* Added `--library-search-path` option to `nvidia-ctk cdi generate` command in `csv` mode. This allows folders where
libraries are located to be specified explicitly.
* Updated go-nvlib to support devices which are not present in the PCI device database. This allows the creation of dev/char symlinks on systems with such devices installed.
* Added `UsesNVGPUModule` info function for more robust platform detection. This is required on Tegra-based systems where libnvidia-ml.so is also supported.
* [toolkit-container] Set `NVIDIA_VISIBLE_DEVICES=void` to prevent injection of NVIDIA devices and drivers into the NVIDIA Container Toolkit container.
## v1.14.0-rc.2
* Fix bug causing incorrect nvidia-smi symlink to be created on WSL2 systems with multiple driver roots.
* Remove dependency on coreutils when installing package on RPM-based systems.
* Create ouput folders if required when running `nvidia-ctk runtime configure`
* Generate default config as post-install step.
* Added support for detecting GSP firmware at custom paths when generating CDI specifications.
* Added logic to skip the extraction of image requirements if `NVIDIA_DISABLE_REQUIRES` is set to `true`.
* [libnvidia-container] Include Shared Compiler Library (libnvidia-gpucomp.so) in the list of compute libaries.
* [toolkit-container] Ensure that common envvars have higher priority when configuring the container engines.
* [toolkit-container] Bump CUDA base image version to 12.2.0.
* [toolkit-container] Remove installation of nvidia-experimental runtime. This is superceded by the NVIDIA Container Runtime in CDI mode.
## v1.14.0-rc.1
* Add support for updating containerd configs to the `nvidia-ctk runtime configure` command.
* Create file in `etc/ld.so.conf.d` with permissions `644` to support non-root containers.
* Generate CDI specification files with `644` permissions to allow rootless applications (e.g. podman)
* Add `nvidia-ctk cdi list` command to show the known CDI devices.
* Add support for generating merged devices (e.g. `all` device) to the nvcdi API.
* Use *.* pattern to locate libcuda.so when generating a CDI specification to support platforms where a patch version is not specified.
* Update go-nvlib to skip devices that are not MIG capable when generating CDI specifications.
* Add `nvidia-container-runtime-hook.path` config option to specify NVIDIA Container Runtime Hook path explicitly.
* Fix bug in creation of `/dev/char` symlinks by failing operation if kernel modules are not loaded.
* Add option to load kernel modules when creating device nodes
* Add option to create device nodes when creating `/dev/char` symlinks
* [libnvidia-container] Support OpenSSL 3 with the Encrypt/Decrypt library
* [toolkit-container] Allow same envars for all runtime configs
## v1.13.1
* Update `update-ldcache` hook to only update ldcache if it exists.
* Update `update-ldcache` hook to create `/etc/ld.so.conf.d` folder if it doesn't exist.
* Fix failure when libcuda cannot be located during XOrg library discovery.
* Fix CDI spec generation on systems that use `/etc/alternatives` (e.g. Debian)
## v1.13.0
* Promote 1.13.0-rc.3 to 1.13.0
## v1.13.0-rc.3
* Only initialize NVML for modes that require it when runing `nvidia-ctk cdi generate`.
* Prefer /run over /var/run when locating nvidia-persistenced and nvidia-fabricmanager sockets.
* Fix the generation of CDI specifications for management containers when the driver libraries are not in the LDCache.
* Add transformers to deduplicate and simplify CDI specifications.
* Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions.
* Also return an error from the nvcdi.New constructor instead of panicing.
* Detect XOrg libraries for injection and CDI spec generation.
* Add `nvidia-ctk system create-device-nodes` command to create control devices.
* Add `nvidia-ctk cdi transform` command to apply transforms to CDI specifications.
* Add `--vendor` and `--class` options to `nvidia-ctk cdi generate`
* [libnvidia-container] Fix segmentation fault when RPC initialization fails.
* [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2.
* [libnvidia-container] Remove make targets for fedora35 as the centos8 packages are compatible.
* [toolkit-container] Add `nvidia-container-runtime.modes.cdi.annotation-prefixes` config option that allows the CDI annotation prefixes that are read to be overridden.
* [toolkit-container] Create device nodes when generating CDI specification for management containers.
* [toolkit-container] Add `nvidia-container-runtime.runtimes` config option to set the low-level runtime for the NVIDIA Container Runtime
## v1.13.0-rc.2
* Don't fail chmod hook if paths are not injected
* Only create `by-path` symlinks if CDI devices are actually requested.
* Fix possible blank `nvidia-ctk` path in generated CDI specifications
* Fix error in postun scriplet on RPM-based systems
* Only check `NVIDIA_VISIBLE_DEVICES` for environment variables if no annotations are specified.
* Add `cdi.default-kind` config option for constructing fully-qualified CDI device names in CDI mode
* Add support for `accept-nvidia-visible-devices-envvar-unprivileged` config setting in CDI mode
* Add `nvidia-container-runtime-hook.skip-mode-detection` config option to bypass mode detection. This allows `legacy` and `cdi` mode, for example, to be used at the same time.
* Add support for generating CDI specifications for GDS and MOFED devices
* Ensure CDI specification is validated on save when generating a spec
* Rename `--discovery-mode` argument to `--mode` for `nvidia-ctk cdi generate`
* [libnvidia-container] Fix segfault on WSL2 systems
* [toolkit-container] Add `--cdi-enabled` flag to toolkit config
* [toolkit-container] Install `nvidia-ctk` from toolkit container
* [toolkit-container] Use installed `nvidia-ctk` path in NVIDIA Container Toolkit config
* [toolkit-container] Bump CUDA base images to 12.1.0
* [toolkit-container] Set `nvidia-ctk` path in the
* [toolkit-container] Add `cdi.k8s.io/*` to set of allowed annotations in containerd config
* [toolkit-container] Generate CDI specification for use in management containers
* [toolkit-container] Install experimental runtime as `nvidia-container-runtime.experimental` instead of `nvidia-container-runtime-experimental`
* [toolkit-container] Install and configure mode-specific runtimes for `cdi` and `legacy` modes
## v1.13.0-rc.1
* Include MIG-enabled devices as GPUs when generating CDI specification
* Fix missing NVML symbols when running `nvidia-ctk` on some platforms [#49]
* Add CDI spec generation for WSL2-based systems to `nvidia-ctk cdi generate` command
* Add `auto` mode to `nvidia-ctk cdi generate` command to automatically detect a WSL2-based system over a standard NVML-based system.
* Add mode-specific (`.cdi` and `.legacy`) NVIDIA Container Runtime binaries for use in the GPU Operator
* Discover all `gsb*.bin` GSP firmware files when generating CDI specification.
* Align `.deb` and `.rpm` release candidate package versions
* Remove `fedora35` packaging targets
* [libnvidia-container] Include all `gsp*.bin` firmware files if present
* [libnvidia-container] Align `.deb` and `.rpm` release candidate package versions
* [libnvidia-container] Remove `fedora35` packaging targets
* [toolkit-container] Install `nvidia-container-toolkit-operator-extensions` package for mode-specific executables.
* [toolkit-container] Allow `nvidia-container-runtime.mode` to be set when configuring the NVIDIA Container Toolkit
## v1.12.0
* Promote `v1.12.0-rc.5` to `v1.12.0`
* Rename `nvidia cdi generate` `--root` flag to `--driver-root` to better indicate intent
* [libnvidia-container] Add nvcubins.bin to DriverStore components under WSL2
* [toolkit-container] Bump CUDA base images to 12.0.1
## v1.12.0-rc.5
* Fix bug here the `nvidia-ctk` path was not properly resolved. This causes failures to run containers when the runtime is configured in `csv` mode or if the `NVIDIA_DRIVER_CAPABILITIES` includes `graphics` or `display` (e.g. `all`).
## v1.12.0-rc.4
* Generate a minimum CDI spec version for improved compatibility.
* Add `--device-name-strategy` options to the `nvidia-ctk cdi generate` command that can be used to control how device names are constructed.
* Set default for CDI device name generation to `index` to generate device names such as `nvidia.com/gpu=0` or `nvidia.com/gpu=1:0` by default.
## v1.12.0-rc.3
* Don't fail if by-path symlinks for DRM devices do not exist

View File

@@ -61,7 +61,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)
$(CMD_TARGETS): cmd-%:
GOOS=$(GOOS) go build -ldflags "-s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
build:
GOOS=$(GOOS) go build ./...

View File

@@ -17,19 +17,29 @@ ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
# NOTE: In cases where the libc version is a concern, we would have to use an
# image based on the target OS to build the golang executables here -- especially
# if cgo code is included.
FROM golang:${GOLANG_VERSION} as build
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
# We override the GOPATH to ensure that the binaries are installed to
# /artifacts/bin
ARG GOPATH=/artifacts
RUN yum install -y \
wget make git gcc \
&& \
rm -rf /var/cache/yum/*
# Install the experiemental nvidia-container-runtime
# NOTE: This will be integrated into the nvidia-container-toolkit package / repo
ARG NVIDIA_CONTAINER_RUNTIME_EXPERIMENTAL_VERSION=experimental
RUN GOPATH=/artifacts go install github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental@${NVIDIA_CONTAINER_RUNTIME_EXPERIMENTAL_VERSION}
ARG GOLANG_VERSION=x.x.x
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build
COPY . .
@@ -53,7 +63,7 @@ RUN [[ "${BASE_DIST}" != "centos8" ]] || \
)
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_VISIBLE_DEVICES=void
ENV NVIDIA_DRIVER_CAPABILITIES=utility
ARG ARTIFACTS_ROOT
@@ -87,11 +97,4 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
yum update -y ${CVE_UPDATES} && \
rm -rf /var/cache/yum/*; \
fi
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -18,19 +18,23 @@ ARG GOLANG_VERSION=x.x.x
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG VERSION="N/A"
ARG ARTIFACTS_ROOT
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
WORKDIR /artifacts/packages
# build-args are added to the manifest.txt file below.
ARG BASE_DIST
ARG PACKAGE_DIST
ARG PACKAGE_VERSION
ARG GIT_BRANCH
ARG GIT_COMMIT
ARG GIT_COMMIT_SHORT
ARG SOURCE_DATE_EPOCH
ARG VERSION
# Create a manifest.txt file with the absolute paths of all deb and rpm packages in the container
RUN echo "IMAGE_EPOCH=$(date '+%s')" | sed 's/^/#/g' > /artifacts/manifest.txt && \
RUN echo "#IMAGE_EPOCH=$(date '+%s')" > /artifacts/manifest.txt && \
env | sed 's/^/#/g' >> /artifacts/manifest.txt && \
find /artifacts/packages -iname '*.deb' -o -iname '*.rpm' >> /artifacts/manifest.txt

View File

@@ -17,19 +17,28 @@ ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
# NOTE: In cases where the libc version is a concern, we would have to use an
# image based on the target OS to build the golang executables here -- especially
# if cgo code is included.
FROM golang:${GOLANG_VERSION} as build
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} as build
# We override the GOPATH to ensure that the binaries are installed to
# /artifacts/bin
ARG GOPATH=/artifacts
RUN apt-get update && \
apt-get install -y wget make git gcc \
&& \
rm -rf /var/lib/apt/lists/*
# Install the experiemental nvidia-container-runtime
# NOTE: This will be integrated into the nvidia-container-toolkit package / repo
ARG NVIDIA_CONTAINER_RUNTIME_EXPERIMENTAL_VERSION=experimental
RUN GOPATH=/artifacts go install github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental@${NVIDIA_CONTAINER_RUNTIME_EXPERIMENTAL_VERSION}
ARG GOLANG_VERSION=x.x.x
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build
COPY . .
@@ -53,7 +62,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
rm -rf /var/lib/apt/lists/*
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_VISIBLE_DEVICES=void
ENV NVIDIA_DRIVER_CAPABILITIES=utility
ARG ARTIFACTS_ROOT
@@ -66,7 +75,7 @@ ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH ${TARGETARCH}
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container"
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container/stable"
ARG LIBNVIDIA_CONTAINER0_VERSION
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
@@ -95,11 +104,4 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
apt-get update && apt-get upgrade -y ${CVE_UPDATES} && \
rm -rf /var/lib/apt/lists/*; \
fi
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -14,6 +14,7 @@
BUILD_MULTI_ARCH_IMAGES ?= false
DOCKER ?= docker
REGCTL ?= regctl
BUILDX =
ifeq ($(BUILD_MULTI_ARCH_IMAGES),true)
@@ -44,13 +45,13 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DEFAULT_PUSH_TARGET := ubuntu20.04
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7
DISTRIBUTIONS := ubuntu20.04 ubi8 centos7
META_TARGETS := packaging
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
TEST_TARGETS := $(patsubst %,test-%, $(DISTRIBUTIONS))
TEST_TARGETS := $(patsubst %,test-%,$(DISTRIBUTIONS))
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
@@ -74,6 +75,14 @@ endif
push-%: DIST = $(*)
push-short: DIST = $(DEFAULT_PUSH_TARGET)
# Define the push targets
$(PUSH_TARGETS): push-%:
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
push-short:
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
@@ -95,9 +104,9 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg VERSION="$(VERSION)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--build-arg GIT_COMMIT_SHORT="$(GIT_COMMIT_SHORT)" \
--build-arg GIT_BRANCH="$(GIT_BRANCH)" \
--build-arg SOURCE_DATE_EPOCH="$(SOURCE_DATE_EPOCH)" \
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
-f $(DOCKERFILE) \
$(CURDIR)
@@ -105,24 +114,20 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
build-ubuntu%: BASE_DIST = $(*)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
build-ubi8: BASE_DIST := ubi8
build-ubi8: DOCKERFILE_SUFFIX := centos
build-ubi8: PACKAGE_DIST = centos8
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-ubi8: PACKAGE_DIST = centos7
build-centos7: BASE_DIST = $(*)
build-centos7: DOCKERFILE_SUFFIX := centos
build-centos7: PACKAGE_DIST = $(BASE_DIST)
build-centos7: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-packaging: BASE_DIST := ubuntu20.04
build-packaging: DOCKERFILE_SUFFIX := packaging
build-packaging: PACKAGE_ARCH := amd64
build-packaging: PACKAGE_DIST = all
build-packaging: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
# Test targets
test-%: DIST = $(*)
@@ -138,18 +143,11 @@ $(TEST_TARGETS): test-%:
test-packaging: DIST = packaging
test-packaging:
@echo "Testing package image contents"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/amazonlinux2/aarch64" || echo "Missing amazonlinux2/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/amazonlinux2/x86_64" || echo "Missing amazonlinux2/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/ppc64le" || echo "Missing centos7/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/aarch64" || echo "Missing centos7/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian10/amd64" || echo "Missing debian10/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian9/amd64" || echo "Missing debian9/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/opensuse-leap15.1/x86_64" || echo "Missing opensuse-leap15.1/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu16.04/amd64" || echo "Missing ubuntu16.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu16.04/ppc64le" || echo "Missing ubuntu16.04/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"

View File

@@ -16,17 +16,6 @@ PUSH_ON_BUILD ?= false
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
REGCTL ?= regctl
$(PUSH_TARGETS): push-%:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE)
push-short:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)
# We only have x86_64 packages for centos7
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

View File

@@ -13,11 +13,3 @@
# limitations under the License.
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
$(PUSH_TARGETS): push-%:
$(DOCKER) tag "$(IMAGE)" "$(OUT_IMAGE)"
$(DOCKER) push "$(OUT_IMAGE)"
push-short:
$(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)"
$(DOCKER) push "$(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)"

View File

@@ -2,15 +2,6 @@ package main
import (
"log"
"strings"
)
const (
allDriverCapabilities = DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx")
defaultDriverCapabilities = DriverCapabilities("utility,compute")
none = DriverCapabilities("")
all = DriverCapabilities("all")
)
func capabilityToCLI(cap string) string {
@@ -34,50 +25,3 @@ func capabilityToCLI(cap string) string {
}
return ""
}
// DriverCapabilities is used to process the NVIDIA_DRIVER_CAPABILITIES environment
// variable. Operations include default values, filtering, and handling meta values such as "all"
type DriverCapabilities string
// Intersection returns intersection between two sets of capabilities.
func (d DriverCapabilities) Intersection(capabilities DriverCapabilities) DriverCapabilities {
if capabilities == all {
return d
}
if d == all {
return capabilities
}
lookup := make(map[string]bool)
for _, c := range d.list() {
lookup[c] = true
}
var found []string
for _, c := range capabilities.list() {
if lookup[c] {
found = append(found, c)
}
}
intersection := DriverCapabilities(strings.Join(found, ","))
return intersection
}
// String returns the string representation of the driver capabilities
func (d DriverCapabilities) String() string {
return string(d)
}
// list returns the driver capabilities as a list
func (d DriverCapabilities) list() []string {
var caps []string
for _, c := range strings.Split(string(d), ",") {
trimmed := strings.TrimSpace(c)
if len(trimmed) == 0 {
continue
}
caps = append(caps, trimmed)
}
return caps
}

View File

@@ -1,134 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestDriverCapabilitiesIntersection(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
supportedCapabilities DriverCapabilities
expectedIntersection DriverCapabilities
}{
{
capabilities: none,
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: all,
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: all,
supportedCapabilities: allDriverCapabilities,
expectedIntersection: allDriverCapabilities,
},
{
capabilities: allDriverCapabilities,
supportedCapabilities: all,
expectedIntersection: allDriverCapabilities,
},
{
capabilities: none,
supportedCapabilities: all,
expectedIntersection: none,
},
{
capabilities: none,
supportedCapabilities: DriverCapabilities("cap1"),
expectedIntersection: none,
},
{
capabilities: DriverCapabilities("cap0,cap1"),
supportedCapabilities: DriverCapabilities("cap1,cap0"),
expectedIntersection: DriverCapabilities("cap0,cap1"),
},
{
capabilities: defaultDriverCapabilities,
supportedCapabilities: allDriverCapabilities,
expectedIntersection: defaultDriverCapabilities,
},
{
capabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
supportedCapabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
expectedIntersection: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
{
capabilities: DriverCapabilities("cap1"),
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
supportedCapabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
expectedIntersection: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
intersection := tc.supportedCapabilities.Intersection(tc.capabilities)
require.EqualValues(t, tc.expectedIntersection, intersection)
})
}
}
func TestDriverCapabilitiesList(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
expected []string
}{
{
capabilities: DriverCapabilities(""),
},
{
capabilities: DriverCapabilities(" "),
},
{
capabilities: DriverCapabilities(","),
},
{
capabilities: DriverCapabilities(",cap"),
expected: []string{"cap"},
},
{
capabilities: DriverCapabilities("cap,"),
expected: []string{"cap"},
},
{
capabilities: DriverCapabilities("cap0,,cap1"),
expected: []string{"cap0", "cap1"},
},
{
capabilities: DriverCapabilities("cap1,cap0,cap3"),
expected: []string{"cap1", "cap0", "cap3"},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
require.EqualValues(t, tc.expected, tc.capabilities.list())
})
}
}

View File

@@ -10,6 +10,7 @@ import (
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
)
@@ -37,14 +38,16 @@ type nvidiaConfig struct {
MigConfigDevices string
MigMonitorDevices string
DriverCapabilities string
Requirements []string
DisableRequire bool
// Requirements defines the requirements DSL for the container to run.
// This is empty if no specific requirements are needed, or if requirements are
// explicitly disabled.
Requirements []string
}
type containerConfig struct {
Pid int
Rootfs string
Env map[string]string
Image image.CUDA
Nvidia *nvidiaConfig
}
@@ -130,7 +133,7 @@ func isPrivileged(s *Spec) bool {
}
var caps []string
// If v1.1.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
// If v1.0.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54
rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1")
rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5")
@@ -139,28 +142,31 @@ func isPrivileged(s *Spec) bool {
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
} else {
var lc LinuxCapabilities
err := json.Unmarshal(*s.Process.Capabilities, &lc)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
for _, c := range caps {
if c == capSysAdmin {
return true
}
}
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
caps = lc.Bounding
return false
}
for _, c := range caps {
if c == capSysAdmin {
return true
}
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
process := specs.Process{
Env: s.Process.Env,
}
return false
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
fullSpec := specs.Spec{
Version: *s.Version,
Process: &process,
}
return image.IsPrivileged(&fullSpec)
}
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
@@ -265,10 +271,12 @@ func getMigMonitorDevices(env map[string]string) *string {
return nil
}
func getDriverCapabilities(env map[string]string, supportedDriverCapabilities DriverCapabilities, legacyImage bool) DriverCapabilities {
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities
capabilities := supportedDriverCapabilities.Intersection(defaultDriverCapabilities)
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
@@ -279,9 +287,9 @@ func getDriverCapabilities(env map[string]string, supportedDriverCapabilities Dr
if capsEnvSpecified && len(capsEnv) > 0 {
// If the envvironment variable is specified and is non-empty, use the capabilities value
envCapabilities := DriverCapabilities(capsEnv)
envCapabilities := image.NewDriverCapabilities(capsEnv)
capabilities = supportedDriverCapabilities.Intersection(envCapabilities)
if envCapabilities != all && capabilities != envCapabilities {
if !envCapabilities.IsAll() && len(capabilities) != len(envCapabilities) {
log.Panicln(fmt.Errorf("unsupported capabilities found in '%v' (allowed '%v')", envCapabilities, capabilities))
}
}
@@ -316,22 +324,19 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
requirements, err := image.GetRequirements()
if err != nil {
log.Panicln("failed to get requirements", err)
}
disableRequire := image.HasDisableRequire()
return &nvidiaConfig{
Devices: devices,
MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices,
DriverCapabilities: driverCapabilities,
Requirements: requirements,
DisableRequire: disableRequire,
}
}
@@ -349,7 +354,10 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
s := loadSpec(path.Join(b, "config.json"))
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
image, err := image.New(
image.WithEnv(s.Process.Env),
image.WithDisableRequire(hook.DisableRequire),
)
if err != nil {
log.Panicln(err)
}
@@ -358,7 +366,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
return containerConfig{
Pid: h.Pid,
Rootfs: s.Root.Path,
Env: image,
Image: image,
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
}
}

View File

@@ -38,9 +38,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: allDriverCapabilities.String(),
DriverCapabilities: image.SupportedDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -52,9 +51,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: allDriverCapabilities.String(),
DriverCapabilities: image.SupportedDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -84,9 +82,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "",
DriverCapabilities: allDriverCapabilities.String(),
DriverCapabilities: image.SupportedDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -98,9 +95,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities.String(),
DriverCapabilities: image.SupportedDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -113,9 +109,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -128,9 +123,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities.String(),
DriverCapabilities: image.SupportedDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -143,9 +137,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "video,display",
DriverCapabilities: "display,video",
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -160,9 +153,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "video,display",
DriverCapabilities: "display,video",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: false,
},
},
{
@@ -178,9 +170,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: true,
DriverCapabilities: "display,video",
Requirements: []string{},
},
},
{
@@ -209,9 +200,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -241,9 +231,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -255,9 +244,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -270,9 +258,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -285,9 +272,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities.String(),
DriverCapabilities: image.SupportedDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -300,9 +286,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "video,display",
DriverCapabilities: "display,video",
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -317,9 +302,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "video,display",
DriverCapabilities: "display,video",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: false,
},
},
{
@@ -335,9 +319,8 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: true,
DriverCapabilities: "display,video",
Requirements: []string{},
},
},
{
@@ -349,9 +332,8 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
Requirements: []string{},
DisableRequire: false,
},
},
{
@@ -365,9 +347,8 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
MigConfigDevices: "mig0,mig1",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -391,9 +372,8 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
MigMonitorDevices: "mig0,mig1",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
@@ -418,7 +398,7 @@ func TestGetNvidiaConfig(t *testing.T) {
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: "video,display",
DriverCapabilities: "display,video",
},
},
{
@@ -433,7 +413,7 @@ func TestGetNvidiaConfig(t *testing.T) {
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: "video,display",
DriverCapabilities: "display,video",
},
},
{
@@ -447,7 +427,7 @@ func TestGetNvidiaConfig(t *testing.T) {
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
},
},
{
@@ -458,15 +438,12 @@ func TestGetNvidiaConfig(t *testing.T) {
},
privileged: true,
hookConfig: &HookConfig{
SwarmResource: func() *string {
s := "DOCKER_SWARM_RESOURCE"
return &s
}(),
SwarmResource: "DOCKER_SWARM_RESOURCE",
SupportedDriverCapabilities: "video,display,utility,compute",
},
expectedConfig: &nvidiaConfig{
Devices: "GPU1,GPU2",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
},
},
{
@@ -477,15 +454,12 @@ func TestGetNvidiaConfig(t *testing.T) {
},
privileged: true,
hookConfig: &HookConfig{
SwarmResource: func() *string {
s := "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE"
return &s
}(),
SwarmResource: "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE",
SupportedDriverCapabilities: "video,display,utility,compute",
},
expectedConfig: &nvidiaConfig{
Devices: "GPU1,GPU2",
DriverCapabilities: defaultDriverCapabilities.String(),
DriverCapabilities: image.DefaultDriverCapabilities.String(),
},
},
}
@@ -496,7 +470,7 @@ func TestGetNvidiaConfig(t *testing.T) {
getConfig := func() {
hookConfig := tc.hookConfig
if hookConfig == nil {
defaultConfig := getDefaultHookConfig()
defaultConfig, _ := getDefaultHookConfig()
hookConfig = &defaultConfig
}
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
@@ -525,7 +499,6 @@ func TestGetNvidiaConfig(t *testing.T) {
require.Equal(t, tc.expectedConfig.DriverCapabilities, config.DriverCapabilities)
require.ElementsMatch(t, tc.expectedConfig.Requirements, config.Requirements)
require.Equal(t, tc.expectedConfig.DisableRequire, config.DisableRequire)
})
}
}
@@ -708,7 +681,7 @@ func TestDeviceListSourcePriority(t *testing.T) {
env := map[string]string{
envNVVisibleDevices: tc.envvarDevices,
}
hookConfig := getDefaultHookConfig()
hookConfig, _ := getDefaultHookConfig()
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
@@ -946,7 +919,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
func TestGetDriverCapabilities(t *testing.T) {
supportedCapabilities := "compute,utility,display,video"
supportedCapabilities := "compute,display,utility,video"
testCases := []struct {
description string
@@ -981,7 +954,7 @@ func TestGetDriverCapabilities(t *testing.T) {
},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: defaultDriverCapabilities.String(),
expectedCapabilities: image.DefaultDriverCapabilities.String(),
},
{
description: "Env unset for legacy image is 'all'",
@@ -1004,7 +977,7 @@ func TestGetDriverCapabilities(t *testing.T) {
env: map[string]string{},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: defaultDriverCapabilities.String(),
expectedCapabilities: image.DefaultDriverCapabilities.String(),
},
{
description: "Env is all for modern image",
@@ -1022,7 +995,7 @@ func TestGetDriverCapabilities(t *testing.T) {
},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: defaultDriverCapabilities.String(),
expectedCapabilities: image.DefaultDriverCapabilities.String(),
},
{
description: "Invalid capabilities panic",
@@ -1042,11 +1015,14 @@ func TestGetDriverCapabilities(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
var capabilites DriverCapabilities
var capabilites string
c := HookConfig{
SupportedDriverCapabilities: tc.supportedCapabilities,
}
getDriverCapabilities := func() {
supportedCapabilities := DriverCapabilities(tc.supportedCapabilities)
capabilites = getDriverCapabilities(tc.env, supportedCapabilities, tc.legacyImage)
capabilites = c.getDriverCapabilities(tc.env, tc.legacyImage).String()
}
if tc.expectedPanic {

View File

@@ -1,14 +1,15 @@
package main
import (
"fmt"
"log"
"os"
"path"
"reflect"
"strings"
"github.com/BurntSushi/toml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
)
const (
@@ -16,91 +17,66 @@ const (
driverPath = "/run/nvidia/driver"
)
var defaultPaths = [...]string{
path.Join(driverPath, configPath),
configPath,
}
// CLIConfig : options for nvidia-container-cli.
type CLIConfig struct {
Root *string `toml:"root"`
Path *string `toml:"path"`
Environment []string `toml:"environment"`
Debug *string `toml:"debug"`
Ldcache *string `toml:"ldcache"`
LoadKmods bool `toml:"load-kmods"`
NoPivot bool `toml:"no-pivot"`
NoCgroups bool `toml:"no-cgroups"`
User *string `toml:"user"`
Ldconfig *string `toml:"ldconfig"`
}
var defaultPaths = [...]string{}
// HookConfig : options for the nvidia-container-runtime-hook.
type HookConfig struct {
DisableRequire bool `toml:"disable-require"`
SwarmResource *string `toml:"swarm-resource"`
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"`
type HookConfig config.Config
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"`
}
func getDefaultHookConfig() HookConfig {
return HookConfig{
DisableRequire: false,
SwarmResource: nil,
AcceptEnvvarUnprivileged: true,
AcceptDeviceListAsVolumeMounts: false,
SupportedDriverCapabilities: allDriverCapabilities,
NvidiaContainerCLI: CLIConfig{
Root: nil,
Path: nil,
Environment: []string{},
Debug: nil,
Ldcache: nil,
LoadKmods: true,
NoPivot: false,
NoCgroups: false,
User: nil,
Ldconfig: nil,
},
NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(),
func getDefaultHookConfig() (HookConfig, error) {
defaultCfg, err := config.GetDefault()
if err != nil {
return HookConfig{}, err
}
return *(*HookConfig)(defaultCfg), nil
}
func getHookConfig() (config HookConfig) {
var err error
if len(*configflag) > 0 {
config = getDefaultHookConfig()
_, err = toml.DecodeFile(*configflag, &config)
if err != nil {
log.Panicln("couldn't open configuration file:", err)
}
// loadConfig loads the required paths for the hook config.
func loadConfig() (*config.Config, error) {
var configPaths []string
var required bool
if len(*configflag) != 0 {
configPaths = append(configPaths, *configflag)
required = true
} else {
for _, p := range defaultPaths {
config = getDefaultHookConfig()
_, err = toml.DecodeFile(p, &config)
if err == nil {
break
} else if !os.IsNotExist(err) {
log.Panicln("couldn't open default configuration file:", err)
}
configPaths = append(configPaths, path.Join(driverPath, configPath), configPath)
}
for _, p := range configPaths {
cfg, err := config.New(
config.WithConfigFile(p),
config.WithRequired(true),
)
if err == nil {
return cfg.Config()
} else if os.IsNotExist(err) && !required {
continue
}
return nil, fmt.Errorf("couldn't open required configuration file: %v", err)
}
if config.SupportedDriverCapabilities == all {
config.SupportedDriverCapabilities = allDriverCapabilities
return config.GetDefault()
}
func getHookConfig() (*HookConfig, error) {
cfg, err := loadConfig()
if err != nil {
return nil, fmt.Errorf("failed to load config: %v", err)
}
// We ensure that the supported-driver-capabilites option is a subset of allDriverCapabilities
if intersection := allDriverCapabilities.Intersection(config.SupportedDriverCapabilities); intersection != config.SupportedDriverCapabilities {
config := (*HookConfig)(cfg)
allSupportedDriverCapabilities := image.SupportedDriverCapabilities
if config.SupportedDriverCapabilities == "all" {
config.SupportedDriverCapabilities = allSupportedDriverCapabilities.String()
}
configuredCapabilities := image.NewDriverCapabilities(config.SupportedDriverCapabilities)
// We ensure that the configured value is a subset of all supported capabilities
if !allSupportedDriverCapabilities.IsSuperset(configuredCapabilities) {
configName := config.getConfigOption("SupportedDriverCapabilities")
log.Panicf("Invalid value for config option '%v'; %v (supported: %v)\n", configName, config.SupportedDriverCapabilities, allDriverCapabilities)
log.Panicf("Invalid value for config option '%v'; %v (supported: %v)\n", configName, config.SupportedDriverCapabilities, allSupportedDriverCapabilities.String())
}
return config
return config, nil
}
// getConfigOption returns the toml config option associated with the
@@ -120,11 +96,11 @@ func (c HookConfig) getConfigOption(fieldName string) string {
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
func (c *HookConfig) getSwarmResourceEnvvars() []string {
if c.SwarmResource == nil {
if c.SwarmResource == "" {
return nil
}
candidates := strings.Split(*c.SwarmResource, ",")
candidates := strings.Split(c.SwarmResource, ",")
var envvars []string
for _, c := range candidates {

View File

@@ -21,6 +21,7 @@ import (
"os"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/stretchr/testify/require"
)
@@ -28,16 +29,16 @@ func TestGetHookConfig(t *testing.T) {
testCases := []struct {
lines []string
expectedPanic bool
expectedDriverCapabilities DriverCapabilities
expectedDriverCapabilities string
}{
{
expectedDriverCapabilities: allDriverCapabilities,
expectedDriverCapabilities: image.SupportedDriverCapabilities.String(),
},
{
lines: []string{
"supported-driver-capabilities = \"all\"",
},
expectedDriverCapabilities: allDriverCapabilities,
expectedDriverCapabilities: image.SupportedDriverCapabilities.String(),
},
{
lines: []string{
@@ -47,19 +48,19 @@ func TestGetHookConfig(t *testing.T) {
},
{
lines: []string{},
expectedDriverCapabilities: allDriverCapabilities,
expectedDriverCapabilities: image.SupportedDriverCapabilities.String(),
},
{
lines: []string{
"supported-driver-capabilities = \"\"",
},
expectedDriverCapabilities: none,
expectedDriverCapabilities: "",
},
{
lines: []string{
"supported-driver-capabilities = \"utility,compute\"",
"supported-driver-capabilities = \"compute,utility\"",
},
expectedDriverCapabilities: DriverCapabilities("utility,compute"),
expectedDriverCapabilities: "compute,utility",
},
}
@@ -89,7 +90,8 @@ func TestGetHookConfig(t *testing.T) {
var config HookConfig
getHookConfig := func() {
config = getHookConfig()
c, _ := getHookConfig()
config = *c
}
if tc.expectedPanic {
@@ -109,10 +111,6 @@ func TestGetSwarmResourceEnvvars(t *testing.T) {
value string
expected []string
}{
{
value: "nil",
expected: nil,
},
{
value: "",
expected: nil,
@@ -146,12 +144,7 @@ func TestGetSwarmResourceEnvvars(t *testing.T) {
for i, tc := range testCases {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
c := &HookConfig{
SwarmResource: func() *string {
if tc.value == "nil" {
return nil
}
return &tc.value
}(),
SwarmResource: tc.value,
}
envvars := c.getSwarmResourceEnvvars()

View File

@@ -13,7 +13,9 @@ import (
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
@@ -36,16 +38,12 @@ func exit() {
os.Exit(0)
}
func getCLIPath(config CLIConfig) string {
if config.Path != nil {
return *config.Path
func getCLIPath(config config.ContainerCLIConfig) string {
if config.Path != "" {
return config.Path
}
var root string
if config.Root != nil {
root = *config.Root
}
if err := os.Setenv("PATH", lookup.GetPath(root)); err != nil {
if err := os.Setenv("PATH", lookup.GetPath(config.Root)); err != nil {
log.Panicln("couldn't set PATH variable:", err)
}
@@ -71,25 +69,28 @@ func doPrestart() {
defer exit()
log.SetFlags(0)
hook := getHookConfig()
cli := hook.NvidiaContainerCLI
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
hook, err := getHookConfig()
if err != nil || hook == nil {
log.Panicln("error getting hook config:", err)
}
cli := hook.NVIDIAContainerCLIConfig
container := getContainerConfig(hook)
container := getContainerConfig(*hook)
nvidia := container.Nvidia
if nvidia == nil {
// Not a GPU container, nothing to do.
return
}
if !hook.NVIDIAContainerRuntimeHookConfig.SkipModeDetection && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntimeConfig.Mode, container.Image) != "legacy" {
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
}
rootfs := getRootfsPath(container)
args := []string{getCLIPath(cli)}
if cli.Root != nil {
args = append(args, fmt.Sprintf("--root=%s", *cli.Root))
if cli.Root != "" {
args = append(args, fmt.Sprintf("--root=%s", cli.Root))
}
if cli.LoadKmods {
args = append(args, "--load-kmods")
@@ -99,19 +100,19 @@ func doPrestart() {
}
if *debugflag {
args = append(args, "--debug=/dev/stderr")
} else if cli.Debug != nil {
args = append(args, fmt.Sprintf("--debug=%s", *cli.Debug))
} else if cli.Debug != "" {
args = append(args, fmt.Sprintf("--debug=%s", cli.Debug))
}
if cli.Ldcache != nil {
args = append(args, fmt.Sprintf("--ldcache=%s", *cli.Ldcache))
if cli.Ldcache != "" {
args = append(args, fmt.Sprintf("--ldcache=%s", cli.Ldcache))
}
if cli.User != nil {
args = append(args, fmt.Sprintf("--user=%s", *cli.User))
if cli.User != "" {
args = append(args, fmt.Sprintf("--user=%s", cli.User))
}
args = append(args, "configure")
if cli.Ldconfig != nil {
args = append(args, fmt.Sprintf("--ldconfig=%s", *cli.Ldconfig))
if cli.Ldconfig != "" {
args = append(args, fmt.Sprintf("--ldconfig=%s", cli.Ldconfig))
}
if cli.NoCgroups {
args = append(args, "--no-cgroups")
@@ -133,10 +134,8 @@ func doPrestart() {
args = append(args, capabilityToCLI(cap))
}
if !hook.DisableRequire && !nvidia.DisableRequire {
for _, req := range nvidia.Requirements {
args = append(args, fmt.Sprintf("--require=%s", req))
}
for _, req := range nvidia.Requirements {
args = append(args, fmt.Sprintf("--require=%s", req))
}
args = append(args, fmt.Sprintf("--pid=%s", strconv.FormatUint(uint64(container.Pid), 10)))
@@ -185,11 +184,11 @@ func main() {
}
}
// logInterceptor implements the info.Logger interface to allow for logging from this function.
type logInterceptor struct{}
// logInterceptor implements the logger.Interface to allow for logging from executable.
type logInterceptor struct {
logger.NullLogger
}
func (l *logInterceptor) Infof(format string, args ...interface{}) {
log.Printf(format, args...)
}
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}

View File

@@ -0,0 +1,34 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)
func main() {
rt := runtime.New(
runtime.WithModeOverride("cdi"),
)
err := rt.Run(os.Args)
if err != nil {
os.Exit(1)
}
}

View File

@@ -0,0 +1,34 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)
func main() {
rt := runtime.New(
runtime.WithModeOverride("legacy"),
)
err := rt.Run(os.Args)
if err != nil {
os.Exit(1)
}
}

View File

@@ -1,89 +1,15 @@
package main
import (
"fmt"
"os"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)
// version must be set by go build's -X main.version= option in the Makefile.
var version = "unknown"
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
var logger = NewLogger()
func main() {
err := run(os.Args)
r := runtime.New()
err := r.Run(os.Args)
if err != nil {
logger.Errorf("%v", err)
os.Exit(1)
}
}
// run is an entry point that allows for idiomatic handling of errors
// when calling from the main function.
func run(argv []string) (rerr error) {
printVersion := hasVersionFlag(argv)
if printVersion {
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
}
cfg, err := config.GetConfig()
if err != nil {
return fmt.Errorf("error loading config: %v", err)
}
logger, err = UpdateLogger(
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
argv,
)
if err != nil {
return fmt.Errorf("failed to set up logger: %v", err)
}
defer func() {
if rerr != nil {
logger.Errorf("%v", rerr)
}
logger.Reset()
}()
logger.Debugf("Command line arguments: %v", argv)
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
if err != nil {
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
}
if printVersion {
fmt.Print("\n")
}
return runtime.Exec(argv)
}
// TODO: This should be refactored / combined with parseArgs in logger.
func hasVersionFlag(args []string) bool {
for i := 0; i < len(args); i++ {
param := args[i]
parts := strings.SplitN(param, "=", 2)
trimmed := strings.TrimLeft(parts[0], "-")
// If this is not a flag we continue
if parts[0] == trimmed {
continue
}
// Check the version flag
if trimmed == "version" {
return true
}
}
return false
}

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"encoding/json"
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
@@ -13,6 +14,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
@@ -41,7 +43,7 @@ func TestMain(m *testing.M) {
var err error
moduleRoot, err := test.GetModuleRoot()
if err != nil {
logger.Fatalf("error in test setup: could not get module root: %v", err)
log.Fatalf("error in test setup: could not get module root: %v", err)
}
testBinPath := filepath.Join(moduleRoot, "test", "bin")
testInputPath := filepath.Join(moduleRoot, "test", "input")
@@ -53,11 +55,11 @@ func TestMain(m *testing.M) {
// Confirm that the environment is configured correctly
runcPath, err := exec.LookPath(runcExecutableName)
if err != nil || filepath.Join(testBinPath, runcExecutableName) != runcPath {
logger.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
log.Fatalf("error in test setup: mock runc path set incorrectly in TestMain(): %v", err)
}
hookPath, err := exec.LookPath(nvidiaHook)
if err != nil || filepath.Join(testBinPath, nvidiaHook) != hookPath {
logger.Fatalf("error in test setup: mock hook path set incorrectly in TestMain(): %v", err)
log.Fatalf("error in test setup: mock hook path set incorrectly in TestMain(): %v", err)
}
// Store the root and binary paths in the test Config
@@ -77,7 +79,7 @@ func TestMain(m *testing.M) {
// case 1) nvidia-container-runtime run --bundle
// case 2) nvidia-container-runtime create --bundle
// - Confirm the runtime handles bad input correctly
// - Confirm the runtime handles bad input correctly
func TestBadInput(t *testing.T) {
err := cfg.generateNewRuntimeSpec()
if err != nil {
@@ -91,9 +93,10 @@ func TestBadInput(t *testing.T) {
}
// case 1) nvidia-container-runtime run --bundle <bundle-name> <ctr-name>
// - Confirm the runtime runs with no errors
// - Confirm the runtime runs with no errors
//
// case 2) nvidia-container-runtime create --bundle <bundle-name> <ctr-name>
// - Confirm the runtime inserts the NVIDIA prestart hook correctly
// - Confirm the runtime inserts the NVIDIA prestart hook correctly
func TestGoodInput(t *testing.T) {
err := cfg.generateNewRuntimeSpec()
if err != nil {
@@ -170,7 +173,8 @@ func TestDuplicateHook(t *testing.T) {
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
// testing.
func addNVIDIAHook(spec *specs.Spec) error {
m := modifier.NewStableRuntimeModifier(logger.Logger)
logger, _ := testlog.NewNullLogger()
m := modifier.NewStableRuntimeModifier(logger, nvidiaHook)
return m.Modify(spec)
}

View File

@@ -16,6 +16,31 @@ nvidia-ctk runtime configure --set-as-default
will ensure that the NVIDIA Container Runtime is added as the default runtime to the default container
engine.
## Configure the NVIDIA Container Toolkit
The `config` command of the `nvidia-ctk` CLI allows a user to display and manipulate the NVIDIA Container Toolkit
configuration.
For example, running the following command:
```bash
nvidia-ctk config default
```
will display the default config for the detected platform.
Whereas
```bash
nvidia-ctk config
```
will display the effective NVIDIA Container Toolkit config using the configured config file, and running:
Individual config options can be set by specifying these are key-value pairs to the `--set` argument:
```bash
nvidia-ctk config --set nvidia-container-cli.no-cgroups=true
```
By default, all commands output to `STDOUT`, but specifying the `--output` flag writes the config to the specified file.
### Generate CDI specifications
The [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface) provides

View File

@@ -18,16 +18,18 @@ package cdi
import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/generate"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/list"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
// NewCommand constructs an info command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -44,6 +46,8 @@ func (m command) build() *cli.Command {
hook.Subcommands = []*cli.Command{
generate.NewCommand(m.logger),
transform.NewCommand(m.logger),
list.NewCommand(m.logger),
}
return &hook

View File

@@ -1,156 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
// The supplied NVML Library is used to query the expected driver version.
func NewDriverDiscoverer(logger *logrus.Logger, root string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
version, r := nvmllib.SystemGetDriverVersion()
if r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to determine driver version: %v", r)
}
libraries, err := NewDriverLibraryDiscoverer(logger, root, nvidiaCTKPath, version)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
}
firmwares := NewDriverFirmwareDiscoverer(logger, root, version)
binaries := NewDriverBinariesDiscoverer(logger, root)
d := discover.Merge(
libraries,
firmwares,
binaries,
)
return d, nil
}
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
func NewDriverLibraryDiscoverer(logger *logrus.Logger, root string, nvidiaCTKPath string, version string) (discover.Discover, error) {
libraryPaths, err := getVersionLibs(logger, root, version)
if err != nil {
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
}
libraries := discover.NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
),
root,
libraryPaths,
)
cfg := &discover.Config{
Root: root,
NvidiaCTKPath: nvidiaCTKPath,
}
hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, cfg)
d := discover.Merge(
libraries,
hooks,
)
return d, nil
}
// NewDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version.
func NewDriverFirmwareDiscoverer(logger *logrus.Logger, root string, version string) discover.Discover {
gspFirmwarePath := filepath.Join("/lib/firmware/nvidia", version, "gsp.bin")
return discover.NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
),
root,
[]string{gspFirmwarePath},
)
}
// NewDriverBinariesDiscoverer creates a discoverer for GSP firmware associated with the GPU driver.
func NewDriverBinariesDiscoverer(logger *logrus.Logger, root string) discover.Discover {
return discover.NewMounts(
logger,
lookup.NewExecutableLocator(logger, root),
root,
[]string{
"nvidia-smi", /* System management interface */
"nvidia-debugdump", /* GPU coredump utility */
"nvidia-persistenced", /* Persistence mode utility */
"nvidia-cuda-mps-control", /* Multi process service CLI */
"nvidia-cuda-mps-server", /* Multi process service server */
},
)
}
// getVersionLibs checks the LDCache for libraries ending in the specified driver version.
// Although the ldcache at the specified root is queried, the paths are returned relative to this root.
// This allows the standard mount location logic to be used for resolving the mounts.
func getVersionLibs(logger *logrus.Logger, root string, version string) ([]string, error) {
logger.Infof("Using driver version %v", version)
cache, err := ldcache.New(logger, root)
if err != nil {
return nil, fmt.Errorf("failed to load ldcache: %v", err)
}
libs32, libs64 := cache.List()
var libs []string
for _, l := range libs64 {
if strings.HasSuffix(l, version) {
logger.Infof("found 64-bit driver lib: %v", l)
libs = append(libs, l)
}
}
for _, l := range libs32 {
if strings.HasSuffix(l, version) {
logger.Infof("found 32-bit driver lib: %v", l)
libs = append(libs, l)
}
}
if root == "/" || root == "" {
return libs, nil
}
var relative []string
for _, l := range libs {
relative = append(relative, strings.TrimPrefix(l, root))
}
return relative, nil
}

View File

@@ -18,41 +18,47 @@ package generate
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"sigs.k8s.io/yaml"
)
const (
formatJSON = "json"
formatYAML = "yaml"
allDeviceName = "all"
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
type config struct {
type options struct {
output string
format string
deviceNameStrategy string
root string
driverRoot string
nvidiaCTKPath string
mode string
vendor string
class string
librarySearchPaths cli.StringSlice
csv struct {
files cli.StringSlice
}
}
// NewCommand constructs a generate-cdi command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -61,17 +67,17 @@ func NewCommand(logger *logrus.Logger) *cli.Command {
// build creates the CLI command
func (m command) build() *cli.Command {
cfg := config{}
opts := options{}
// Create the 'generate-cdi' command
c := cli.Command{
Name: "generate",
Usage: "Generate CDI specifications for use with CDI-enabled runtimes",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
return m.run(c, &opts)
},
}
@@ -79,305 +85,181 @@ func (m command) build() *cli.Command {
&cli.StringFlag{
Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
Destination: &cfg.output,
Destination: &opts.output,
},
&cli.StringFlag{
Name: "format",
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
Value: formatYAML,
Destination: &cfg.format,
Value: spec.FormatYAML,
Destination: &opts.format,
},
&cli.StringFlag{
Name: "mode",
Aliases: []string{"discovery-mode"},
Usage: "The mode to use when discovering the available entities. One of [auto | nvml | wsl]. If mode is set to 'auto' the mode will be determined based on the system configuration.",
Value: nvcdi.ModeAuto,
Destination: &opts.mode,
},
&cli.StringFlag{
Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
Value: deviceNameStrategyIndex,
Destination: &cfg.deviceNameStrategy,
Value: nvcdi.DeviceNameStrategyIndex,
Destination: &opts.deviceNameStrategy,
},
&cli.StringFlag{
Name: "root",
Usage: "Specify the root to use when discovering the entities that should be included in the CDI specification.",
Destination: &cfg.root,
Name: "driver-root",
Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.",
Destination: &opts.driverRoot,
},
&cli.StringSliceFlag{
Name: "library-search-path",
Usage: "Specify the path to search for libraries when discovering the entities that should be included in the CDI specification.\n\tNote: This option only applies to CSV mode.",
Destination: &opts.librarySearchPaths,
},
&cli.StringFlag{
Name: "nvidia-ctk-path",
Usage: "Specify the path to use for the nvidia-ctk in the generated CDI specification. If this is left empty, the path will be searched.",
Destination: &cfg.nvidiaCTKPath,
Destination: &opts.nvidiaCTKPath,
},
&cli.StringFlag{
Name: "vendor",
Aliases: []string{"cdi-vendor"},
Usage: "the vendor string to use for the generated CDI specification.",
Value: "nvidia.com",
Destination: &opts.vendor,
},
&cli.StringFlag{
Name: "class",
Aliases: []string{"cdi-class"},
Usage: "the class string to use for the generated CDI specification.",
Value: "gpu",
Destination: &opts.class,
},
&cli.StringSliceFlag{
Name: "csv.file",
Usage: "The path to the list of CSV files to use when generating the CDI specification in CSV mode.",
Value: cli.NewStringSlice(csv.DefaultFileList()...),
Destination: &opts.csv.files,
},
}
return &c
}
func (m command) validateFlags(r *cli.Context, cfg *config) error {
cfg.format = strings.ToLower(cfg.format)
switch cfg.format {
case formatJSON:
case formatYAML:
func (m command) validateFlags(c *cli.Context, opts *options) error {
opts.format = strings.ToLower(opts.format)
switch opts.format {
case spec.FormatJSON:
case spec.FormatYAML:
default:
return fmt.Errorf("invalid output format: %v", cfg.format)
return fmt.Errorf("invalid output format: %v", opts.format)
}
_, err := newDeviceNamer(cfg.deviceNameStrategy)
opts.mode = strings.ToLower(opts.mode)
switch opts.mode {
case nvcdi.ModeAuto:
case nvcdi.ModeCSV:
case nvcdi.ModeNvml:
case nvcdi.ModeWsl:
case nvcdi.ModeManagement:
default:
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
}
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
if err != nil {
return err
}
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
if outputFileFormat := formatFromFilename(opts.output); outputFileFormat != "" {
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
if !c.IsSet("format") {
opts.format = outputFileFormat
} else if outputFileFormat != opts.format {
m.logger.Warningf("Requested output format %q does not match format implied by output file name: %q", opts.format, outputFileFormat)
}
}
if err := cdi.ValidateVendorName(opts.vendor); err != nil {
return fmt.Errorf("invalid CDI vendor name: %v", err)
}
if err := cdi.ValidateClassName(opts.class); err != nil {
return fmt.Errorf("invalid CDI class name: %v", err)
}
return nil
}
func (m command) run(c *cli.Context, cfg *config) error {
deviceNamer, err := newDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return fmt.Errorf("failed to create device namer: %v", err)
}
spec, err := m.generateSpec(
cfg.root,
discover.FindNvidiaCTK(m.logger, cfg.nvidiaCTKPath),
deviceNamer,
)
func (m command) run(c *cli.Context, opts *options) error {
spec, err := m.generateSpec(opts)
if err != nil {
return fmt.Errorf("failed to generate CDI spec: %v", err)
}
m.logger.Infof("Generated CDI spec with version %v", spec.Raw().Version)
var outputTo io.Writer
if cfg.output == "" {
outputTo = os.Stdout
} else {
err := createParentDirsIfRequired(cfg.output)
if opts.output == "" {
_, err := spec.WriteTo(os.Stdout)
if err != nil {
return fmt.Errorf("failed to create parent folders for output file: %v", err)
return fmt.Errorf("failed to write CDI spec to STDOUT: %v", err)
}
outputFile, err := os.Create(cfg.output)
if err != nil {
return fmt.Errorf("failed to create output file: %v", err)
}
defer outputFile.Close()
outputTo = outputFile
return nil
}
if outputFileFormat := formatFromFilename(cfg.output); outputFileFormat != "" {
m.logger.Debugf("Inferred output format as %q from output file name", outputFileFormat)
if !c.IsSet("format") {
cfg.format = outputFileFormat
} else if outputFileFormat != cfg.format {
m.logger.Warningf("Requested output format %q does not match format implied by output file name: %q", cfg.format, outputFileFormat)
}
}
data, err := yaml.Marshal(spec)
if err != nil {
return fmt.Errorf("failed to marshal CDI spec: %v", err)
}
if strings.ToLower(cfg.format) == formatJSON {
data, err = yaml.YAMLToJSONStrict(data)
if err != nil {
return fmt.Errorf("failed to convert CDI spec from YAML to JSON: %v", err)
}
}
err = writeToOutput(cfg.format, data, outputTo)
if err != nil {
return fmt.Errorf("failed to write output: %v", err)
}
return nil
return spec.Save(opts.output)
}
func formatFromFilename(filename string) string {
ext := filepath.Ext(filename)
switch strings.ToLower(ext) {
case ".json":
return formatJSON
case ".yaml":
return formatYAML
case ".yml":
return formatYAML
return spec.FormatJSON
case ".yaml", ".yml":
return spec.FormatYAML
}
return ""
}
func writeToOutput(format string, data []byte, output io.Writer) error {
if format == formatYAML {
_, err := output.Write([]byte("---\n"))
if err != nil {
return fmt.Errorf("failed to write YAML separator: %v", err)
}
}
_, err := output.Write(data)
func (m command) generateSpec(opts *options) (spec.Interface, error) {
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
if err != nil {
return fmt.Errorf("failed to write data: %v", err)
return nil, fmt.Errorf("failed to create device namer: %v", err)
}
return nil
}
func (m command) generateSpec(root string, nvidiaCTKPath string, namer deviceNamer) (*specs.Spec, error) {
nvmllib := nvml.New()
if r := nvmllib.Init(); r != nvml.SUCCESS {
return nil, r
cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(opts.driverRoot),
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
nvcdi.WithDeviceNamer(deviceNamer),
nvcdi.WithMode(string(opts.mode)),
nvcdi.WithCSVFiles(opts.csv.files.Value()),
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
)
if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err)
}
defer nvmllib.Shutdown()
devicelib := device.New(device.WithNvml(nvmllib))
deviceSpecs, err := m.generateDeviceSpecs(devicelib, root, nvidiaCTKPath, namer)
deviceSpecs, err := cdilib.GetAllDeviceSpecs()
if err != nil {
return nil, fmt.Errorf("failed to create device CDI specs: %v", err)
}
allDevice := createAllDevice(deviceSpecs)
deviceSpecs = append(deviceSpecs, allDevice)
allEdits := edits.NewContainerEdits()
ipcs, err := NewIPCDiscoverer(m.logger, root)
commonEdits, err := cdilib.GetCommonEdits()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
return nil, fmt.Errorf("failed to create edits common for entities: %v", err)
}
ipcEdits, err := edits.FromDiscoverer(ipcs)
if err != nil {
return nil, fmt.Errorf("failed to create container edits for IPC sockets: %v", err)
}
// TODO: We should not have to update this after the fact
for _, s := range ipcEdits.Mounts {
s.Options = append(s.Options, "noexec")
}
allEdits.Append(ipcEdits)
common, err := NewCommonDiscoverer(m.logger, root, nvidiaCTKPath, nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
}
deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, root, nvidiaCTKPath, deviceSpecs)
if err != nil {
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
}
commonEdits, err := edits.FromDiscoverer(discover.Merge(common, deviceFolderPermissionHooks))
if err != nil {
return nil, fmt.Errorf("failed to create container edits for common entities: %v", err)
}
allEdits.Append(commonEdits)
// We construct the spec and determine the minimum required version based on the specification.
spec := specs.Spec{
Version: "NOT_SET",
Kind: "nvidia.com/gpu",
Devices: deviceSpecs,
ContainerEdits: *allEdits.ContainerEdits,
}
minVersion, err := cdi.MinimumRequiredVersion(&spec)
if err != nil {
return nil, fmt.Errorf("failed to get minumum required CDI spec version: %v", err)
}
m.logger.Infof("Using minimum required CDI spec version: %s", minVersion)
spec.Version = minVersion
return &spec, nil
}
func (m command) generateDeviceSpecs(devicelib device.Interface, root string, nvidiaCTKPath string, namer deviceNamer) ([]specs.Device, error) {
var deviceSpecs []specs.Device
err := devicelib.VisitDevices(func(i int, d device.Device) error {
isMigEnabled, err := d.IsMigEnabled()
if err != nil {
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
}
if isMigEnabled {
return nil
}
device, err := NewFullGPUDiscoverer(m.logger, root, nvidiaCTKPath, d)
if err != nil {
return fmt.Errorf("failed to create device: %v", err)
}
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return fmt.Errorf("failed to create container edits for device: %v", err)
}
deviceName, err := namer.GetDeviceName(i, d)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: deviceName,
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
}
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
device, err := NewMigDeviceDiscoverer(m.logger, "", d, mig)
if err != nil {
return fmt.Errorf("failed to create MIG device: %v", err)
}
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return fmt.Errorf("failed to create container edits for MIG device: %v", err)
}
deviceName, err := namer.GetMigDeviceName(i, j, mig)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: deviceName,
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
return nil
})
if err != nil {
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
}
return deviceSpecs, nil
}
// createAllDevice creates an 'all' device which combines the edits from the previous devices
func createAllDevice(deviceSpecs []specs.Device) specs.Device {
edits := edits.NewContainerEdits()
for _, d := range deviceSpecs {
edit := cdi.ContainerEdits{
ContainerEdits: &d.ContainerEdits,
}
edits.Append(&edit)
}
all := specs.Device{
Name: "all",
ContainerEdits: *edits.ContainerEdits,
}
return all
}
// createParentDirsIfRequired creates the parent folders of the specified path if requried.
// Note that MkdirAll does not specifically check whether the specified path is non-empty and raises an error if it is.
// The path will be empty if filename in the current folder is specified, for example
func createParentDirsIfRequired(filename string) error {
dir := filepath.Dir(filename)
if dir == "" {
return nil
}
return os.MkdirAll(dir, 0755)
return spec.New(
spec.WithVendor(opts.vendor),
spec.WithClass(opts.class),
spec.WithDeviceSpecs(deviceSpecs),
spec.WithEdits(*commonEdits.ContainerEdits),
spec.WithFormat(opts.format),
spec.WithMergedDeviceOptions(
transform.WithName(allDeviceName),
transform.WithSkipIfExists(true),
),
spec.WithPermissions(0644),
)
}

View File

@@ -1,75 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// NewMigDeviceDiscoverer creates a discoverer for the specified mig device and its parent.
func NewMigDeviceDiscoverer(logger *logrus.Logger, root string, parent device.Device, d device.MigDevice) (discover.Discover, error) {
minor, ret := parent.GetMinorNumber()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
}
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
migCaps, err := nvcaps.NewMigCaps()
if err != nil {
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
}
gi, ret := d.GetGpuInstanceId()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
}
ci, ret := d.GetComputeInstanceId()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
}
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
if err != nil {
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
}
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
if err != nil {
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
}
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
[]string{
parentPath,
giCapDevicePath,
ciCapDevicePath,
},
root,
)
return deviceNodes, nil
}

View File

@@ -1,84 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
type deviceNamer interface {
GetDeviceName(int, device.Device) (string, error)
GetMigDeviceName(int, int, device.MigDevice) (string, error)
}
const (
deviceNameStrategyIndex = "index"
deviceNameStrategyTypeIndex = "type-index"
deviceNameStrategyUUID = "uuid"
)
type deviceNameIndex struct {
gpuPrefix string
migPrefix string
}
type deviceNameUUID struct{}
// newDeviceNamer creates a Device Namer based on the supplied strategy.
// This namer can be used to construct the names for MIG and GPU devices when generating the CDI spec.
func newDeviceNamer(strategy string) (deviceNamer, error) {
switch strategy {
case deviceNameStrategyIndex:
return deviceNameIndex{}, nil
case deviceNameStrategyTypeIndex:
return deviceNameIndex{gpuPrefix: "gpu", migPrefix: "mig"}, nil
case deviceNameStrategyUUID:
return deviceNameUUID{}, nil
}
return nil, fmt.Errorf("invalid device name strategy: %v", strategy)
}
// GetDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameIndex) GetDeviceName(i int, d device.Device) (string, error) {
return fmt.Sprintf("%s%d", s.gpuPrefix, i), nil
}
// GetMigDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameIndex) GetMigDeviceName(i int, j int, d device.MigDevice) (string, error) {
return fmt.Sprintf("%s%d:%d", s.migPrefix, i, j), nil
}
// GetDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameUUID) GetDeviceName(i int, d device.Device) (string, error) {
uuid, ret := d.GetUUID()
if ret != nvml.SUCCESS {
return "", fmt.Errorf("failed to get device UUID: %v", ret)
}
return uuid, nil
}
// GetMigDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameUUID) GetMigDeviceName(i int, j int, d device.MigDevice) (string, error) {
uuid, ret := d.GetUUID()
if ret != nvml.SUCCESS {
return "", fmt.Errorf("failed to get device UUID: %v", ret)
}
return uuid, nil
}

View File

@@ -0,0 +1,86 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package list
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type config struct{}
// NewCommand constructs a cdi list command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
cfg := config{}
// Create the command
c := cli.Command{
Name: "list",
Usage: "List the available CDI devices",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{}
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *config) error {
return nil
}
func (m command) run(c *cli.Context, cfg *config) error {
registry, err := cdi.NewCache(
cdi.WithAutoRefresh(false),
cdi.WithSpecDirs(cdi.DefaultSpecDirs...),
)
if err != nil {
return fmt.Errorf("failed to create CDI cache: %v", err)
}
refreshErr := registry.Refresh()
devices := registry.ListDevices()
m.logger.Infof("Found %d CDI devices", len(devices))
if refreshErr != nil {
m.logger.Warningf("Refreshing the CDI registry returned the following error(s): %v", refreshErr)
}
for _, device := range devices {
fmt.Printf("%s\n", device)
}
return nil
}

View File

@@ -0,0 +1,159 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package root
import (
"fmt"
"io"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/urfave/cli/v2"
)
type loadSaver interface {
Load() (spec.Interface, error)
Save(spec.Interface) error
}
type command struct {
logger logger.Interface
}
type transformOptions struct {
input string
output string
}
type options struct {
transformOptions
from string
to string
}
// NewCommand constructs a generate-cdi command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "root",
Usage: "Apply a root transform to a CDI specification",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "input",
Usage: "Specify the file to read the CDI specification from. If this is '-' the specification is read from STDIN",
Value: "-",
Destination: &opts.input,
},
&cli.StringFlag{
Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
Destination: &opts.output,
},
&cli.StringFlag{
Name: "from",
Usage: "specify the root to be transformed",
Destination: &opts.from,
},
&cli.StringFlag{
Name: "to",
Usage: "specify the replacement root. If this is the same as the from root, the transform is a no-op.",
Value: "",
Destination: &opts.to,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, opts *options) error {
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
spec, err := opts.Load()
if err != nil {
return fmt.Errorf("failed to load CDI specification: %w", err)
}
err = transform.NewRootTransformer(
opts.from,
opts.to,
).Transform(spec.Raw())
if err != nil {
return fmt.Errorf("failed to transform CDI specification: %w", err)
}
return opts.Save(spec)
}
// Load lodas the input CDI specification
func (o transformOptions) Load() (spec.Interface, error) {
contents, err := o.getContents()
if err != nil {
return nil, fmt.Errorf("failed to read spec contents: %v", err)
}
raw, err := cdi.ParseSpec(contents)
if err != nil {
return nil, fmt.Errorf("failed to parse CDI spec: %v", err)
}
return spec.New(
spec.WithRawSpec(raw),
)
}
func (o transformOptions) getContents() ([]byte, error) {
if o.input == "-" {
return io.ReadAll(os.Stdin)
}
return os.ReadFile(o.input)
}
// Save saves the CDI specification to the output file
func (o transformOptions) Save(s spec.Interface) error {
if o.output == "" {
_, err := s.WriteTo(os.Stdout)
if err != nil {
return fmt.Errorf("failed to write CDI spec to STDOUT: %v", err)
}
return nil
}
return s.Save(o.output)
}

View File

@@ -0,0 +1,51 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package transform
import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi/transform/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
// NewCommand constructs a command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
c := cli.Command{
Name: "transform",
Usage: "Apply a transform to a CDI specification",
}
c.Flags = []cli.Flag{}
c.Subcommands = []*cli.Command{
root.NewCommand(m.logger),
}
return &c
}

View File

@@ -0,0 +1,175 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"errors"
"fmt"
"strconv"
"strings"
createdefault "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/create-default"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
// options stores the subcommand options
type options struct {
flags.Options
sets cli.StringSlice
}
// NewCommand constructs an config command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
opts := options{}
// Create the 'config' command
c := cli.Command{
Name: "config",
Usage: "Interact with the NVIDIA Container Toolkit configuration",
Action: func(ctx *cli.Context) error {
return run(ctx, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "config-file",
Aliases: []string{"config", "c"},
Usage: "Specify the config file to modify.",
Value: config.GetConfigFilePath(),
Destination: &opts.Config,
},
&cli.StringSliceFlag{
Name: "set",
Usage: "Set a config value using the pattern key=value. If value is empty, this is equivalent to specifying the same key in unset. This flag can be specified multiple times",
Destination: &opts.sets,
},
&cli.BoolFlag{
Name: "in-place",
Aliases: []string{"i"},
Usage: "Modify the config file in-place",
Destination: &opts.InPlace,
},
&cli.StringFlag{
Name: "output",
Aliases: []string{"o"},
Usage: "Specify the output file to write to; If not specified, the output is written to stdout",
Destination: &opts.Output,
},
}
c.Subcommands = []*cli.Command{
createdefault.NewCommand(m.logger),
}
return &c
}
func run(c *cli.Context, opts *options) error {
cfgToml, err := config.New(
config.WithConfigFile(opts.Config),
)
if err != nil {
return fmt.Errorf("unable to create config: %v", err)
}
for _, set := range opts.sets.Value() {
key, value, err := (*configToml)(cfgToml).setFlagToKeyValue(set)
if err != nil {
return fmt.Errorf("invalid --set option %v: %w", set, err)
}
cfgToml.Set(key, value)
}
if err := opts.EnsureOutputFolder(); err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
output, err := opts.CreateOutput()
if err != nil {
return fmt.Errorf("failed to open output file: %v", err)
}
defer output.Close()
if err != nil {
return err
}
cfgToml.Save(output)
return nil
}
type configToml config.Toml
var errInvalidConfigOption = errors.New("invalid config option")
var errInvalidFormat = errors.New("invalid format")
// setFlagToKeyValue converts a --set flag to a key-value pair.
// The set flag is of the form key[=value], with the value being optional if key refers to a
// boolean config option.
func (c *configToml) setFlagToKeyValue(setFlag string) (string, interface{}, error) {
if c == nil {
return "", nil, errInvalidConfigOption
}
setParts := strings.SplitN(setFlag, "=", 2)
key := setParts[0]
v := (*config.Toml)(c).Get(key)
if v == nil {
return key, nil, errInvalidConfigOption
}
switch v.(type) {
case bool:
if len(setParts) == 1 {
return key, true, nil
}
}
if len(setParts) != 2 {
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
}
value := setParts[1]
switch vt := v.(type) {
case bool:
b, err := strconv.ParseBool(value)
if err != nil {
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
return key, b, err
case string:
return key, value, nil
case []string:
return key, strings.Split(value, ","), nil
default:
return key, nil, fmt.Errorf("unsupported type for %v (%v)", setParts, vt)
}
}

View File

@@ -0,0 +1,173 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/pelletier/go-toml"
"github.com/stretchr/testify/require"
)
func TestSetFlagToKeyValue(t *testing.T) {
testCases := []struct {
description string
config map[string]interface{}
setFlag string
expectedKey string
expectedValue interface{}
expectedError error
}{
{
description: "empty config returns an error",
setFlag: "anykey=value",
expectedKey: "anykey",
expectedError: errInvalidConfigOption,
},
{
description: "option not present returns an error",
config: map[string]interface{}{
"defined": "defined-value",
},
setFlag: "undefined=new-value",
expectedKey: "undefined",
expectedError: errInvalidConfigOption,
},
{
description: "boolean option assumes true",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean",
expectedKey: "boolean",
expectedValue: true,
},
{
description: "boolean option returns true",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=true",
expectedKey: "boolean",
expectedValue: true,
},
{
description: "boolean option returns false",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=false",
expectedKey: "boolean",
expectedValue: false,
},
{
description: "invalid boolean option returns error",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=something",
expectedKey: "boolean",
expectedValue: "something",
expectedError: errInvalidFormat,
},
{
description: "string option requires value",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string",
expectedKey: "string",
expectedValue: nil,
expectedError: errInvalidFormat,
},
{
description: "string option returns value",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=string-value",
expectedKey: "string",
expectedValue: "string-value",
},
{
description: "string option returns value with equals",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=string-value=more",
expectedKey: "string",
expectedValue: "string-value=more",
},
{
description: "string option treats bool value as string",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=true",
expectedKey: "string",
expectedValue: "true",
},
{
description: "string option treats int value as string",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=5",
expectedKey: "string",
expectedValue: "5",
},
{
description: "[]string option returns single value",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=string-value",
expectedKey: "string",
expectedValue: []string{"string-value"},
},
{
description: "[]string option returns multiple values",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=first,second",
expectedKey: "string",
expectedValue: []string{"first", "second"},
},
{
description: "[]string option returns values with equals",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=first=1,second=2",
expectedKey: "string",
expectedValue: []string{"first=1", "second=2"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
tree, _ := toml.TreeFromMap(tc.config)
cfgToml := (*config.Toml)(tree)
k, v, err := (*configToml)(cfgToml).setFlagToKeyValue(tc.setFlag)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expectedKey, k)
require.EqualValues(t, tc.expectedValue, v)
})
}
}

View File

@@ -0,0 +1,94 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package defaultsubcommand
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
// NewCommand constructs a default command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build creates the CLI command
func (m command) build() *cli.Command {
opts := flags.Options{}
// Create the 'default' command
c := cli.Command{
Name: "default",
Aliases: []string{"create-default", "generate-default"},
Usage: "Generate the default NVIDIA Container Toolkit configuration file",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "output",
Aliases: []string{"o"},
Usage: "Specify the output file to write to; If not specified, the output is written to stdout",
Destination: &opts.Output,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, opts *flags.Options) error {
return opts.Validate()
}
func (m command) run(c *cli.Context, opts *flags.Options) error {
cfgToml, err := config.New()
if err != nil {
return fmt.Errorf("unable to load or create config: %v", err)
}
if err := opts.EnsureOutputFolder(); err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
output, err := opts.CreateOutput()
if err != nil {
return fmt.Errorf("failed to open output file: %v", err)
}
defer output.Close()
_, err = cfgToml.Save(output)
if err != nil {
return fmt.Errorf("failed to write output: %v", err)
}
return nil
}

View File

@@ -0,0 +1,82 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package flags
import (
"fmt"
"io"
"os"
"path/filepath"
)
// Options stores options for the config commands
type Options struct {
Config string
Output string
InPlace bool
}
// Validate checks whether the options are valid.
func (o Options) Validate() error {
if o.InPlace && o.Output != "" {
return fmt.Errorf("cannot specify both --in-place and --output")
}
return nil
}
// GetOutput returns the effective output
func (o Options) GetOutput() string {
if o.InPlace {
return o.Config
}
return o.Output
}
// EnsureOutputFolder creates the output folder if it does not exist.
// If the output folder is not specified (i.e. output to STDOUT), it is ignored.
func (o Options) EnsureOutputFolder() error {
output := o.GetOutput()
if output == "" {
return nil
}
if dir := filepath.Dir(output); dir != "" {
return os.MkdirAll(dir, 0755)
}
return nil
}
// CreateOutput creates the writer for the output.
func (o Options) CreateOutput() (io.WriteCloser, error) {
output := o.GetOutput()
if output == "" {
return nullCloser{os.Stdout}, nil
}
return os.Create(output)
}
// nullCloser is a writer that does nothing on Close.
type nullCloser struct {
io.Writer
}
// Close is a no-op for a nullCloser.
func (d nullCloser) Close() error {
return nil
}

View File

@@ -18,18 +18,19 @@ package chmod
import (
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
type config struct {
@@ -39,7 +40,7 @@ type config struct {
}
// NewCommand constructs a chmod command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -133,7 +134,12 @@ func (m command) run(c *cli.Context, cfg *config) error {
func (m command) getPaths(root string, paths []string) []string {
var pathsInRoot []string
for _, f := range paths {
pathsInRoot = append(pathsInRoot, filepath.Join(root, f))
path := filepath.Join(root, f)
if _, err := os.Stat(path); err != nil {
m.logger.Debugf("Skipping path %q: %v", path, err)
continue
}
pathsInRoot = append(pathsInRoot, path)
}
return pathsInRoot

View File

@@ -22,15 +22,16 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
type config struct {
@@ -41,7 +42,7 @@ type config struct {
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -100,7 +101,10 @@ func (m command) run(c *cli.Context, cfg *config) error {
csvFiles := cfg.filenames.Value()
chainLocator := lookup.NewSymlinkChainLocator(m.logger, cfg.hostRoot)
chainLocator := lookup.NewSymlinkChainLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(cfg.hostRoot),
)
var candidates []string
for _, file := range csvFiles {
@@ -116,7 +120,7 @@ func (m command) run(c *cli.Context, cfg *config) error {
}
targets, err := chainLocator.Locate(ms.Path)
if err != nil {
m.logger.Warnf("Failed to locate symlink %v", ms.Path)
m.logger.Warningf("Failed to locate symlink %v", ms.Path)
}
candidates = append(candidates, targets...)
}
@@ -125,21 +129,18 @@ func (m command) run(c *cli.Context, cfg *config) error {
created := make(map[string]bool)
// candidates is a list of absolute paths to symlinks in a chain, or the final target of the chain.
for _, candidate := range candidates {
targets, err := m.Locate(candidate)
target, err := symlinks.Resolve(candidate)
if err != nil {
m.logger.Debugf("Skipping invalid link: %v", err)
continue
} else if len(targets) != 1 {
m.logger.Debugf("Unexepected number of targets: %v", targets)
continue
} else if targets[0] == candidate {
} else if target == candidate {
m.logger.Debugf("%v is not a symlink", candidate)
continue
}
err = m.createLink(created, cfg.hostRoot, containerRoot, targets[0], candidate)
err = m.createLink(created, cfg.hostRoot, containerRoot, target, candidate)
if err != nil {
m.logger.Warnf("Failed to create link %v: %v", []string{targets[0], candidate}, err)
m.logger.Warningf("Failed to create link %v: %v", []string{target, candidate}, err)
}
}
@@ -147,13 +148,13 @@ func (m command) run(c *cli.Context, cfg *config) error {
for _, l := range links {
parts := strings.Split(l, "::")
if len(parts) != 2 {
m.logger.Warnf("Invalid link specification %v", l)
m.logger.Warningf("Invalid link specification %v", l)
continue
}
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
if err != nil {
m.logger.Warnf("Failed to create link %v: %v", parts, err)
m.logger.Warningf("Failed to create link %v: %v", parts, err)
}
}
@@ -164,7 +165,7 @@ func (m command) run(c *cli.Context, cfg *config) error {
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
linkPath, err := changeRoot(hostRoot, containerRoot, link)
if err != nil {
m.logger.Warnf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
m.logger.Warningf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
}
if created[linkPath] {
m.logger.Debugf("Link %v already created", linkPath)
@@ -173,7 +174,7 @@ func (m command) createLink(created map[string]bool, hostRoot string, containerR
targetPath, err := changeRoot(hostRoot, "/", target)
if err != nil {
m.logger.Warnf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
m.logger.Warningf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
}
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)

View File

@@ -18,19 +18,19 @@ package hook
import (
chmod "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/chmod"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type hookCommand struct {
logger *logrus.Logger
logger logger.Interface
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := hookCommand{
logger: logger,
}

View File

@@ -22,13 +22,13 @@ import (
"path/filepath"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
type config struct {
@@ -37,7 +37,7 @@ type config struct {
}
// NewCommand constructs an update-ldcache command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -84,6 +84,12 @@ func (m command) run(c *cli.Context, cfg *config) error {
return fmt.Errorf("failed to determined container root: %v", err)
}
_, err = os.Stat(filepath.Join(containerRoot, "/etc/ld.so.cache"))
if err != nil && os.IsNotExist(err) {
m.logger.Debugf("No ld.so.cache found, skipping update")
return nil
}
err = m.createConfig(containerRoot, cfg.folders.Value())
if err != nil {
return fmt.Errorf("failed to update ld.so.conf: %v", err)
@@ -105,6 +111,10 @@ func (m command) createConfig(root string, folders []string) error {
return nil
}
if err := os.MkdirAll(filepath.Join(root, "/etc/ld.so.conf.d"), 0755); err != nil {
return fmt.Errorf("failed to create ld.so.conf.d: %v", err)
}
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
if err != nil {
return fmt.Errorf("failed to create config file: %v", err)
@@ -125,5 +135,10 @@ func (m command) createConfig(root string, folders []string) error {
configured[folder] = true
}
// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := os.Chmod(configFile.Name(), 0644); err != nil {
return fmt.Errorf("failed to chmod config file: %v", err)
}
return nil
}

View File

@@ -17,16 +17,16 @@
package info
import (
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
// NewCommand constructs an info command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -35,13 +35,13 @@ func NewCommand(logger *logrus.Logger) *cli.Command {
// build
func (m command) build() *cli.Command {
// Create the 'hook' command
hook := cli.Command{
// Create the 'info' command
info := cli.Command{
Name: "info",
Usage: "Provide information about the system",
}
hook.Subcommands = []*cli.Command{}
info.Subcommands = []*cli.Command{}
return &hook
return &info
}

View File

@@ -20,28 +20,31 @@ import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
infoCLI "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/info"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
)
var logger = log.New()
// config defines the options that can be set for the CLI through config files,
// options defines the options that can be set for the CLI through config files,
// environment variables, or command line flags
type config struct {
type options struct {
// Debug indicates whether the CLI is started in "debug" mode
Debug bool
// Quiet indicates whether the CLI is started in "quiet" mode
Quiet bool
}
func main() {
// Create a config struct to hold the parsed environment variables or command line flags
config := config{}
logger := logrus.New()
// Create a options struct to hold the parsed environment variables or command line flags
opts := options{}
// Create the top-level CLI
c := cli.NewApp()
@@ -57,16 +60,25 @@ func main() {
Name: "debug",
Aliases: []string{"d"},
Usage: "Enable debug-level logging",
Destination: &config.Debug,
Destination: &opts.Debug,
EnvVars: []string{"NVIDIA_CTK_DEBUG"},
},
&cli.BoolFlag{
Name: "quiet",
Usage: "Suppress all output except for errors; overrides --debug",
Destination: &opts.Quiet,
EnvVars: []string{"NVIDIA_CTK_QUIET"},
},
}
// Set log-level for all subcommands
c.Before = func(c *cli.Context) error {
logLevel := log.InfoLevel
if config.Debug {
logLevel = log.DebugLevel
logLevel := logrus.InfoLevel
if opts.Debug {
logLevel = logrus.DebugLevel
}
if opts.Quiet {
logLevel = logrus.ErrorLevel
}
logger.SetLevel(logLevel)
return nil
@@ -79,12 +91,13 @@ func main() {
infoCLI.NewCommand(logger),
cdi.NewCommand(logger),
system.NewCommand(logger),
config.NewCommand(logger),
}
// Run the CLI
err := c.Run(os.Args)
if err != nil {
log.Errorf("%v", err)
log.Exit(1)
logger.Errorf("%v", err)
os.Exit(1)
}
}

View File

@@ -17,31 +17,39 @@
package configure
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/crio"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker"
"github.com/pelletier/go-toml"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
"github.com/urfave/cli/v2"
)
const (
defaultRuntime = "docker"
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
// defaultNVIDIARuntimeName is the default name to use in configs for the NVIDIA Container Runtime
defaultNVIDIARuntimeName = "nvidia"
// defaultNVIDIARuntimeExecutable is the default NVIDIA Container Runtime executable file name
defaultNVIDIARuntimeExecutable = "nvidia-container-runtime"
defaultNVIDIARuntimeExpecutablePath = "/usr/bin/nvidia-container-runtime"
defaultNVIDIARuntimeHookExpecutablePath = "/usr/bin/nvidia-container-runtime-hook"
defaultContainerdConfigFilePath = "/etc/containerd/config.toml"
defaultCrioConfigFilePath = "/etc/crio/crio.conf"
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
// NewCommand constructs an configure command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -54,7 +62,15 @@ type config struct {
dryRun bool
runtime string
configFilePath string
nvidiaOptions nvidia.Options
mode string
hookFilePath string
nvidiaRuntime struct {
name string
path string
hookPath string
setAsDefault bool
}
}
func (m command) build() *cli.Command {
@@ -65,6 +81,9 @@ func (m command) build() *cli.Command {
configure := cli.Command{
Name: "configure",
Usage: "Add a runtime to the specified container engine",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &config)
},
Action: func(c *cli.Context) error {
return m.configureWrapper(c, &config)
},
@@ -78,7 +97,7 @@ func (m command) build() *cli.Command {
},
&cli.StringFlag{
Name: "runtime",
Usage: "the target runtime engine. One of [crio, docker]",
Usage: "the target runtime engine; one of [containerd, crio, docker]",
Value: defaultRuntime,
Destination: &config.runtime,
},
@@ -88,116 +107,173 @@ func (m command) build() *cli.Command {
Destination: &config.configFilePath,
},
&cli.StringFlag{
Name: "nvidia-runtime-name",
Usage: "specify the name of the NVIDIA runtime that will be added",
Value: nvidia.RuntimeName,
Destination: &config.nvidiaOptions.RuntimeName,
Name: "config-mode",
Usage: "the config mode for runtimes that support multiple configuration mechanisms",
Destination: &config.mode,
},
&cli.StringFlag{
Name: "runtime-path",
Name: "oci-hook-path",
Usage: "the path to the OCI runtime hook to create if --config-mode=oci-hook is specified. If no path is specified, the generated hook is output to STDOUT.\n\tNote: The use of OCI hooks is deprecated.",
Destination: &config.hookFilePath,
},
&cli.StringFlag{
Name: "nvidia-runtime-name",
Usage: "specify the name of the NVIDIA runtime that will be added",
Value: defaultNVIDIARuntimeName,
Destination: &config.nvidiaRuntime.name,
},
&cli.StringFlag{
Name: "nvidia-runtime-path",
Aliases: []string{"runtime-path"},
Usage: "specify the path to the NVIDIA runtime executable",
Value: nvidia.RuntimeExecutable,
Destination: &config.nvidiaOptions.RuntimePath,
Value: defaultNVIDIARuntimeExecutable,
Destination: &config.nvidiaRuntime.path,
},
&cli.StringFlag{
Name: "nvidia-runtime-hook-path",
Usage: "specify the path to the NVIDIA Container Runtime hook executable",
Value: defaultNVIDIARuntimeHookExpecutablePath,
Destination: &config.nvidiaRuntime.hookPath,
},
&cli.BoolFlag{
Name: "set-as-default",
Usage: "set the specified runtime as the default runtime",
Destination: &config.nvidiaOptions.SetAsDefault,
Name: "nvidia-set-as-default",
Aliases: []string{"set-as-default"},
Usage: "set the NVIDIA runtime as the default runtime",
Destination: &config.nvidiaRuntime.setAsDefault,
},
}
return &configure
}
func (m command) configureWrapper(c *cli.Context, config *config) error {
func (m command) validateFlags(c *cli.Context, config *config) error {
if config.mode == "oci-hook" {
if !filepath.IsAbs(config.nvidiaRuntime.hookPath) {
return fmt.Errorf("the NVIDIA runtime hook path %q is not an absolute path", config.nvidiaRuntime.hookPath)
}
return nil
}
if config.mode != "" && config.mode != "config-file" {
m.logger.Warningf("Ignoring unsupported config mode for %v: %q", config.runtime, config.mode)
}
config.mode = "config-file"
switch config.runtime {
case "containerd", "crio", "docker":
break
default:
return fmt.Errorf("unrecognized runtime '%v'", config.runtime)
}
switch config.runtime {
case "containerd", "crio":
if config.nvidiaRuntime.path == defaultNVIDIARuntimeExecutable {
config.nvidiaRuntime.path = defaultNVIDIARuntimeExpecutablePath
}
if !filepath.IsAbs(config.nvidiaRuntime.path) {
return fmt.Errorf("the NVIDIA runtime path %q is not an absolute path", config.nvidiaRuntime.path)
}
}
return nil
}
// configureWrapper updates the specified container engine config to enable the NVIDIA runtime
func (m command) configureWrapper(c *cli.Context, config *config) error {
switch config.mode {
case "oci-hook":
return m.configureOCIHook(c, config)
case "config-file":
return m.configureConfigFile(c, config)
}
return fmt.Errorf("unsupported config-mode: %v", config.mode)
}
// configureConfigFile updates the specified container engine config file to enable the NVIDIA runtime.
func (m command) configureConfigFile(c *cli.Context, config *config) error {
configFilePath := config.resolveConfigFilePath()
var cfg engine.Interface
var err error
switch config.runtime {
case "containerd":
cfg, err = containerd.New(
containerd.WithLogger(m.logger),
containerd.WithPath(configFilePath),
)
case "crio":
return m.configureCrio(c, config)
cfg, err = crio.New(
crio.WithLogger(m.logger),
crio.WithPath(configFilePath),
)
case "docker":
return m.configureDocker(c, config)
cfg, err = docker.New(
docker.WithLogger(m.logger),
docker.WithPath(configFilePath),
)
default:
err = fmt.Errorf("unrecognized runtime '%v'", config.runtime)
}
if err != nil || cfg == nil {
return fmt.Errorf("unable to load config for runtime %v: %v", config.runtime, err)
}
return fmt.Errorf("unrecognized runtime '%v'", config.runtime)
}
// configureDocker updates the docker config to enable the NVIDIA Container Runtime
func (m command) configureDocker(c *cli.Context, config *config) error {
configFilePath := config.configFilePath
if configFilePath == "" {
configFilePath = defaultDockerConfigFilePath
}
cfg, err := docker.LoadConfig(configFilePath)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = docker.UpdateConfig(
cfg,
config.nvidiaOptions.RuntimeName,
config.nvidiaOptions.RuntimePath,
config.nvidiaOptions.SetAsDefault,
err = cfg.AddRuntime(
config.nvidiaRuntime.name,
config.nvidiaRuntime.path,
config.nvidiaRuntime.setAsDefault,
)
if err != nil {
return fmt.Errorf("unable to update config: %v", err)
}
if config.dryRun {
output, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return fmt.Errorf("unable to convert to JSON: %v", err)
}
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
return nil
}
err = docker.FlushConfig(cfg, configFilePath)
outputPath := config.getOuputConfigPath()
n, err := cfg.Save(outputPath)
if err != nil {
return fmt.Errorf("unable to flush config: %v", err)
}
m.logger.Infof("Wrote updated config to %v", configFilePath)
m.logger.Infof("It is recommended that the docker daemon be restarted.")
return nil
}
// configureCrio updates the crio config to enable the NVIDIA Container Runtime
func (m command) configureCrio(c *cli.Context, config *config) error {
configFilePath := config.configFilePath
if configFilePath == "" {
configFilePath = defaultCrioConfigFilePath
}
cfg, err := crio.LoadConfig(configFilePath)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
err = crio.UpdateConfig(
cfg,
config.nvidiaOptions.RuntimeName,
config.nvidiaOptions.RuntimePath,
config.nvidiaOptions.SetAsDefault,
)
if err != nil {
return fmt.Errorf("unable to update config: %v", err)
}
if config.dryRun {
output, err := toml.Marshal(cfg)
if err != nil {
return fmt.Errorf("unable to convert to TOML: %v", err)
if outputPath != "" {
if n == 0 {
m.logger.Infof("Removed empty config from %v", outputPath)
} else {
m.logger.Infof("Wrote updated config to %v", outputPath)
}
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
return nil
m.logger.Infof("It is recommended that %v daemon be restarted.", config.runtime)
}
err = crio.FlushConfig(configFilePath, cfg)
if err != nil {
return fmt.Errorf("unable to flush config: %v", err)
}
m.logger.Infof("Wrote updated config to %v", configFilePath)
m.logger.Infof("It is recommended that the cri-o daemon be restarted.")
return nil
}
// resolveConfigFilePath returns the default config file path for the configured container engine
func (c *config) resolveConfigFilePath() string {
if c.configFilePath != "" {
return c.configFilePath
}
switch c.runtime {
case "containerd":
return defaultContainerdConfigFilePath
case "crio":
return defaultCrioConfigFilePath
case "docker":
return defaultDockerConfigFilePath
}
return ""
}
// getOuputConfigPath returns the configured config path or "" if dry-run is enabled
func (c *config) getOuputConfigPath() string {
if c.dryRun {
return ""
}
return c.resolveConfigFilePath()
}
// configureOCIHook creates and configures the OCI hook for the NVIDIA runtime
func (m *command) configureOCIHook(c *cli.Context, config *config) error {
err := ocihook.CreateHook(config.hookFilePath, config.nvidiaRuntime.hookPath)
if err != nil {
return fmt.Errorf("error creating OCI hook: %v", err)
}
return nil
}

View File

@@ -1,75 +0,0 @@
/*
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package nvidia
const (
// RuntimeName is the default name to use in configs for the NVIDIA Container Runtime
RuntimeName = "nvidia"
// RuntimeExecutable is the default NVIDIA Container Runtime executable file name
RuntimeExecutable = "nvidia-container-runtime"
)
// Options specifies the options for the NVIDIA Container Runtime w.r.t a container engine such as docker.
type Options struct {
SetAsDefault bool
RuntimeName string
RuntimePath string
}
// Runtime defines an NVIDIA runtime with a name and a executable
type Runtime struct {
Name string
Path string
}
// DefaultRuntime returns the default runtime for the configured options.
// If the configuration is invalid or the default runtimes should not be set
// the empty string is returned.
func (o Options) DefaultRuntime() string {
if !o.SetAsDefault {
return ""
}
return o.RuntimeName
}
// Runtime creates a runtime struct based on the options.
func (o Options) Runtime() Runtime {
path := o.RuntimePath
if o.RuntimePath == "" {
path = RuntimeExecutable
}
r := Runtime{
Name: o.RuntimeName,
Path: path,
}
return r
}
// DockerRuntimesConfig generatest the expected docker config for the specified runtime
func (r Runtime) DockerRuntimesConfig() map[string]interface{} {
runtimes := make(map[string]interface{})
runtimes[r.Name] = map[string]interface{}{
"path": r.Path,
"args": []string{},
}
return runtimes
}

View File

@@ -18,16 +18,16 @@ package runtime
import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type runtimeCommand struct {
logger *logrus.Logger
logger logger.Interface
}
// NewCommand constructs a runtime command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := runtimeCommand{
logger: logger,
}

View File

@@ -21,36 +21,47 @@ import (
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
)
type allPossible struct {
logger *logrus.Logger
driverRoot string
logger logger.Interface
devRoot string
deviceMajors devices.Devices
migCaps nvcaps.MigCaps
}
// newAllPossible returns a new allPossible device node lister.
// This lister lists all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func newAllPossible(logger *logrus.Logger, driverRoot string) (nodeLister, error) {
func newAllPossible(logger logger.Interface, devRoot string) (nodeLister, error) {
deviceMajors, err := devices.GetNVIDIADevices()
if err != nil {
return nil, fmt.Errorf("failed reading device majors: %v", err)
}
var requiredMajors []devices.Name
migCaps, err := nvcaps.NewMigCaps()
if err != nil {
return nil, fmt.Errorf("failed to read MIG caps: %v", err)
}
if migCaps == nil {
migCaps = make(nvcaps.MigCaps)
} else {
requiredMajors = append(requiredMajors, devices.NVIDIACaps)
}
requiredMajors = append(requiredMajors, devices.NVIDIAGPU, devices.NVIDIAUVM)
for _, name := range requiredMajors {
if !deviceMajors.Exists(name) {
return nil, fmt.Errorf("missing required device major %s", name)
}
}
l := allPossible{
logger: logger,
driverRoot: driverRoot,
devRoot: devRoot,
deviceMajors: deviceMajors,
migCaps: migCaps,
}
@@ -60,8 +71,9 @@ func newAllPossible(logger *logrus.Logger, driverRoot string) (nodeLister, error
// DeviceNodes returns a list of all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func (m allPossible) DeviceNodes() ([]deviceNode, error) {
gpus, err := nvpci.NewFrom(
filepath.Join(m.driverRoot, nvpci.PCIDevicesRoot),
gpus, err := nvpci.New(
nvpci.WithPCIDevicesRoot(filepath.Join(m.devRoot, nvpci.PCIDevicesRoot)),
nvpci.WithLogger(m.logger),
).GetGPUs()
if err != nil {
return nil, fmt.Errorf("failed to get GPU information: %v", err)
@@ -69,7 +81,7 @@ func (m allPossible) DeviceNodes() ([]deviceNode, error) {
count := len(gpus)
if count == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
return nil, nil
}
@@ -168,7 +180,7 @@ func (m allPossible) newDeviceNode(deviceName devices.Name, path string, minor i
major, _ := m.deviceMajors.Get(deviceName)
return deviceNode{
path: filepath.Join(m.driverRoot, path),
path: filepath.Join(m.devRoot, path),
major: uint32(major),
minor: uint32(minor),
}

View File

@@ -24,8 +24,10 @@ import (
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/fsnotify/fsnotify"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
@@ -34,19 +36,21 @@ const (
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
type config struct {
devCharPath string
driverRoot string
dryRun bool
watch bool
createAll bool
devCharPath string
driverRoot string
dryRun bool
watch bool
createAll bool
createDeviceNodes bool
loadKernelModules bool
}
// NewCommand constructs a command sub-command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -97,6 +101,18 @@ func (m command) build() *cli.Command {
Destination: &cfg.createAll,
EnvVars: []string{"CREATE_ALL"},
},
&cli.BoolFlag{
Name: "load-kernel-modules",
Usage: "Load the NVIDIA kernel modules before creating symlinks. This is only applicable when --create-all is set.",
Destination: &cfg.loadKernelModules,
EnvVars: []string{"LOAD_KERNEL_MODULES"},
},
&cli.BoolFlag{
Name: "create-device-nodes",
Usage: "Create the NVIDIA control device nodes in the driver root if they do not exist. This is only applicable when --create-all is set",
Destination: &cfg.createDeviceNodes,
EnvVars: []string{"CREATE_DEVICE_NODES"},
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "If set, the command will not create any symlinks.",
@@ -114,6 +130,16 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
return fmt.Errorf("create-all and watch are mutually exclusive")
}
if cfg.loadKernelModules && !cfg.createAll {
m.logger.Warning("load-kernel-modules is only applicable when create-all is set; ignoring")
cfg.loadKernelModules = false
}
if cfg.createDeviceNodes && !cfg.createAll {
m.logger.Warning("create-device-nodes is only applicable when create-all is set; ignoring")
cfg.createDeviceNodes = false
}
return nil
}
@@ -137,6 +163,8 @@ func (m command) run(c *cli.Context, cfg *config) error {
WithDriverRoot(cfg.driverRoot),
WithDryRun(cfg.dryRun),
WithCreateAll(cfg.createAll),
WithLoadKernelModules(cfg.loadKernelModules),
WithCreateDeviceNodes(cfg.createDeviceNodes),
)
if err != nil {
return fmt.Errorf("failed to create symlink creator: %v", err)
@@ -186,12 +214,15 @@ create:
}
type linkCreator struct {
logger *logrus.Logger
lister nodeLister
driverRoot string
devCharPath string
dryRun bool
createAll bool
logger logger.Interface
lister nodeLister
driverRoot string
devRoot string
devCharPath string
dryRun bool
createAll bool
createDeviceNodes bool
loadKernelModules bool
}
// Creator is an interface for creating symlinks to /dev/nv* devices in /dev/char.
@@ -209,34 +240,81 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
opt(&c)
}
if c.logger == nil {
c.logger = logrus.StandardLogger()
c.logger = logger.New()
}
if c.driverRoot == "" {
c.driverRoot = "/"
}
if c.devRoot == "" {
c.devRoot = "/"
}
if c.devCharPath == "" {
c.devCharPath = defaultDevCharPath
}
if err := c.setup(); err != nil {
return nil, err
}
if c.createAll {
lister, err := newAllPossible(c.logger, c.driverRoot)
lister, err := newAllPossible(c.logger, c.devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create all possible device lister: %v", err)
}
c.lister = lister
} else {
c.lister = existing{c.logger, c.driverRoot}
c.lister = existing{c.logger, c.devRoot}
}
return c, nil
}
func (m linkCreator) setup() error {
if !m.loadKernelModules && !m.createDeviceNodes {
return nil
}
if m.loadKernelModules {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(m.dryRun),
nvmodules.WithRoot(m.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if m.createDeviceNodes {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(m.dryRun),
nvdevices.WithDevRoot(m.devRoot),
)
if err != nil {
return err
}
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA device nodes: %v", err)
}
}
return nil
}
// WithDriverRoot sets the driver root path.
// This is the path in which kernel modules must be loaded.
func WithDriverRoot(root string) Option {
return func(c *linkCreator) {
c.driverRoot = root
}
}
// WithDevRoot sets the root path for the /dev directory.
func WithDevRoot(root string) Option {
return func(c *linkCreator) {
c.devRoot = root
}
}
// WithDevCharPath sets the path at which the symlinks will be created.
func WithDevCharPath(path string) Option {
return func(c *linkCreator) {
@@ -252,7 +330,7 @@ func WithDryRun(dryRun bool) Option {
}
// WithLogger sets the logger.
func WithLogger(logger *logrus.Logger) Option {
func WithLogger(logger logger.Interface) Option {
return func(c *linkCreator) {
c.logger = logger
}
@@ -265,6 +343,20 @@ func WithCreateAll(createAll bool) Option {
}
}
// WithLoadKernelModules sets the loadKernelModules flag for the linkCreator.
func WithLoadKernelModules(loadKernelModules bool) Option {
return func(lc *linkCreator) {
lc.loadKernelModules = loadKernelModules
}
}
// WithCreateDeviceNodes sets the createDeviceNodes flag for the linkCreator.
func WithCreateDeviceNodes(createDeviceNodes bool) Option {
return func(lc *linkCreator) {
lc.createDeviceNodes = createDeviceNodes
}
}
// CreateLinks creates symlinks for all NVIDIA device nodes found in the driver root.
func (m linkCreator) CreateLinks() error {
deviceNodes, err := m.lister.DeviceNodes()
@@ -290,7 +382,7 @@ func (m linkCreator) CreateLinks() error {
err = os.Symlink(target, linkPath)
if err != nil {
m.logger.Warnf("Could not create symlink: %v", err)
m.logger.Warningf("Could not create symlink: %v", err)
}
}

View File

@@ -20,8 +20,8 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@@ -30,8 +30,8 @@ type nodeLister interface {
}
type existing struct {
logger *logrus.Logger
driverRoot string
logger logger.Interface
devRoot string
}
// DeviceNodes returns a list of NVIDIA device nodes in the specified root.
@@ -39,22 +39,22 @@ type existing struct {
func (m existing) DeviceNodes() ([]deviceNode, error) {
locator := lookup.NewCharDeviceLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(m.driverRoot),
lookup.WithRoot(m.devRoot),
lookup.WithOptional(true),
)
devices, err := locator.Locate("/dev/nvidia*")
if err != nil {
m.logger.Warnf("Error while locating device: %v", err)
m.logger.Warningf("Error while locating device: %v", err)
}
capDevices, err := locator.Locate("/dev/nvidia-caps/nvidia-*")
if err != nil {
m.logger.Warnf("Error while locating caps device: %v", err)
m.logger.Warningf("Error while locating caps device: %v", err)
}
if len(devices) == 0 && len(capDevices) == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
return nil, nil
}
@@ -67,7 +67,7 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
var stat unix.Stat_t
err := unix.Stat(d, &stat)
if err != nil {
m.logger.Warnf("Could not stat device: %v", err)
m.logger.Warningf("Could not stat device: %v", err)
continue
}
deviceNode := deviceNode{

View File

@@ -0,0 +1,126 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package createdevicenodes
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type options struct {
driverRoot string
dryRun bool
control bool
loadKernelModules bool
}
// NewCommand constructs a command sub-command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "create-device-nodes",
Usage: "A utility to create NVIDIA device nodes",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "driver-root",
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "control-devices",
Usage: "create all control device nodes: nvidiactl, nvidia-modeset, nvidia-uvm, nvidia-uvm-tools",
Destination: &opts.control,
},
&cli.BoolFlag{
Name: "load-kernel-modules",
Usage: "load the NVIDIA Kernel Modules before creating devices nodes",
Destination: &opts.loadKernelModules,
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "if set, the command will not create any symlinks.",
Value: false,
Destination: &opts.dryRun,
EnvVars: []string{"DRY_RUN"},
},
}
return &c
}
func (m command) validateFlags(r *cli.Context, opts *options) error {
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
if opts.loadKernelModules {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(opts.dryRun),
nvmodules.WithRoot(opts.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if opts.control {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(opts.dryRun),
nvdevices.WithDevRoot(opts.driverRoot),
)
if err != nil {
return err
}
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
}
}
return nil
}

View File

@@ -0,0 +1,101 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package createdevicenodes
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type options struct {
driverRoot string
}
// NewCommand constructs a command sub-command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
opts := options{}
c := cli.Command{
Name: "print-ldcache",
Usage: "A utility to print the contents of the ldcache",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &opts)
},
Action: func(c *cli.Context) error {
return m.run(c, &opts)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "driver-root",
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
Value: "/",
Destination: &opts.driverRoot,
EnvVars: []string{"DRIVER_ROOT"},
},
}
return &c
}
func (m command) validateFlags(r *cli.Context, opts *options) error {
return nil
}
func (m command) run(c *cli.Context, opts *options) error {
cache, err := ldcache.New(m.logger, opts.driverRoot)
if err != nil {
return fmt.Errorf("failed to create ldcache: %v", err)
}
lib32, lib64 := cache.List()
if len(lib32) == 0 {
m.logger.Info("No 32-bit libraries found")
} else {
m.logger.Infof("%d 32-bit libraries found", len(lib32))
for _, lib := range lib32 {
m.logger.Infof("%v", lib)
}
}
if len(lib64) == 0 {
m.logger.Info("No 64-bit libraries found")
} else {
m.logger.Infof("%d 64-bit libraries found", len(lib64))
for _, lib := range lib64 {
m.logger.Infof("%v", lib)
}
}
return nil
}

View File

@@ -18,16 +18,18 @@ package system
import (
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
"github.com/sirupsen/logrus"
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/print-ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
logger logger.Interface
}
// NewCommand constructs a runtime command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
@@ -43,6 +45,8 @@ func (m command) build() *cli.Command {
system.Subcommands = []*cli.Command{
devchar.NewCommand(m.logger),
devicenodes.NewCommand(m.logger),
ldcache.NewCommand(m.logger),
}
return &system

View File

@@ -1,32 +0,0 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -1,32 +0,0 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
user = "root:video"
ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -1,32 +0,0 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -1,32 +0,0 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig.real"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -53,15 +53,6 @@ ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
# Debian Jessie still had ldconfig.real
RUN if [ "$(lsb_release -cs)" = "jessie" ]; then \
sed -i 's;"@/sbin/ldconfig";"@/sbin/ldconfig.real";' $DIST_DIR/config.toml; \
fi
WORKDIR $DIST_DIR
COPY packaging/debian ./debian

View File

@@ -44,16 +44,6 @@ ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
WORKDIR $DIST_DIR/..
COPY packaging/rpm .

View File

@@ -62,16 +62,6 @@ ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
WORKDIR $DIST_DIR/..
COPY packaging/rpm .

View File

@@ -51,10 +51,6 @@ ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
WORKDIR $DIST_DIR
COPY packaging/debian ./debian

View File

@@ -14,10 +14,10 @@
# Supported OSs by architecture
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
AARCH64_TARGETS := centos7 centos8 rhel8 amazonlinux2
# Define top-level build targets
docker%: SHELL:=/bin/bash
@@ -88,63 +88,35 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
# private ubuntu target
--ubuntu%: OS := ubuntu
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--ubuntu%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
--ubuntu%: PKG_REV := 1
# private debian target
--debian%: OS := debian
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--debian%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
--debian%: PKG_REV := 1
# private centos target
--centos%: OS := centos
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
# private fedora target
--fedora%: OS := fedora
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
# The fedora(35) base image has very slow performance when building aarch64 packages.
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
# private amazonlinux target
--amazonlinux%: OS := amazonlinux
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
# private opensuse-leap target
--opensuse-leap%: OS = opensuse-leap
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
--opensuse-leap%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
# private rhel target (actually built on centos)
--rhel%: OS := centos
--rhel%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--rhel%: CONFIG_TOML_SUFFIX := rpm-yum
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
# We allow the CONFIG_TOML_SUFFIX to be overridden.
CONFIG_TOML_SUFFIX ?= $(OS)
docker-build-%:
@echo "Building for $(TARGET_PLATFORM)"
docker pull --platform=linux/$(ARCH) $(BASEIMAGE)
@@ -155,10 +127,9 @@ docker-build-%:
--build-arg BASEIMAGE="$(BASEIMAGE)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg PKG_NAME="$(LIB_NAME)" \
--build-arg PKG_VERS="$(LIB_VERSION)" \
--build-arg PKG_REV="$(PKG_REV)" \
--build-arg PKG_VERS="$(PACKAGE_VERSION)" \
--build-arg PKG_REV="$(PACKAGE_REVISION)" \
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--tag $(BUILDIMAGE) \
--file $(DOCKERFILE) .

25
go.mod
View File

@@ -1,31 +1,29 @@
module github.com/NVIDIA/nvidia-container-toolkit
go 1.18
go 1.20
require (
github.com/BurntSushi/toml v1.0.0
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a
github.com/NVIDIA/go-nvml v0.12.0-1
github.com/container-orchestrated-devices/container-device-interface v0.6.0
github.com/fsnotify/fsnotify v1.5.4
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb
github.com/opencontainers/runtime-spec v1.1.0-rc.2
github.com/pelletier/go-toml v1.9.4
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.7.0
github.com/stretchr/testify v1.8.1
github.com/urfave/cli/v2 v2.3.0
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884
golang.org/x/mod v0.5.0
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6
sigs.k8s.io/yaml v1.3.0
golang.org/x/sys v0.7.0
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/opencontainers/runc v1.1.4 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/opencontainers/runc v1.1.6 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/opencontainers/selinux v1.10.1 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
@@ -33,4 +31,5 @@ require (
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)

81
go.sum
View File

@@ -1,84 +1,73 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.0.0 h1:dtDWrepsVPfW9H/4y7dDgFc2MBUSeJhlaDtK13CxFlU=
github.com/BurntSushi/toml v1.0.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82 h1:x751Xx1tdxkiA/sdkv2J769n21UbYKzVOpe9S/h1M3k=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/NVIDIA/go-nvml v0.12.0-1 h1:6mdjtlFo+17dWL7VFPfuRMtf0061TF4DKls9pkSw6uM=
github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a h1:sP3PcgyIkRlHqfF3Jfpe/7G8kf/qpzG4C8r94y9hLbE=
github.com/container-orchestrated-devices/container-device-interface v0.5.4-0.20230111111500-5b3b5d81179a/go.mod h1:xMRa4fJgXzSDFUCURSimOUgoSc+odohvO3uXT9xjqH0=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/container-orchestrated-devices/container-device-interface v0.6.0 h1:aWwcz/Ep0Fd7ZuBjQGjU/jdPloM7ydhMW13h85jZNvk=
github.com/container-orchestrated-devices/container-device-interface v0.6.0/go.mod h1:OQlgtJtDrOxSQ1BWODC8OZK1tzi9W69wek+Jy17ndzo=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.1 h1:r/myEWzV9lfsM1tFLgDyu0atFtJ1fXn261LKYj/3DxU=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runc v1.1.4 h1:nRCz/8sKg6K6jgYAFLDlXzPeITBZJyX28DBVhWD+5dg=
github.com/opencontainers/runc v1.1.4/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb h1:1xSVPOd7/UA+39/hXEGnBJ13p6JFB0E1EvQFlrRDOXI=
github.com/opencontainers/runc v1.1.6 h1:XbhB8IfG/EsnhNvZtNdLB0GBw92GYEFvKlhaJk9jUgA=
github.com/opencontainers/runc v1.1.6/go.mod h1:CbUumNnWCuTGFukNXahoo/RFBZvDAgRh/smNYNOhA50=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.1.0-rc.2 h1:ucBtEms2tamYYW/SvGpvq9yUN0NEVL6oyLEwDcTSrk8=
github.com/opencontainers/runtime-spec v1.1.0-rc.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.10.1 h1:09LIPVRP3uuZGQvgR+SgMSNBd1Eb3vlRbGqQpoHsF8w=
github.com/opencontainers/selinux v1.10.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -86,33 +75,21 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342 h1:083n9fJt2dWOpJd/X/q9Xgl5XtQLL22uSFYbzVqJssg=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230119114711-6fe07bb33342/go.mod h1:GStidGxhaqJhYFW1YpOnLvYCbL2EsM0od7IW4u7+JgU=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a h1:lceJVurLqiWFdxK6KMDw+SIwrAsFW/af44XrNlbGw78=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a/go.mod h1:KYZksBgh18o+uzgnpDazzG4LVYtnfB96VXHMXypEtik=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884 h1:V0LUbfm4kVA1CPG8FgG9AGZqa3ykE5U12Gd3PZgoItA=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884/go.mod h1:/x5Ky1ZJNyCjDkgSL1atII0EFKQF5WaIHKeP5nkaQfk=
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6 h1:cy1ko5847T/lJ45eyg/7uLprIE/amW5IXxGtEnQdYMI=
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

View File

@@ -16,33 +16,18 @@
package config
import (
"github.com/pelletier/go-toml"
)
// ContainerCLIConfig stores the options for the nvidia-container-cli
type ContainerCLIConfig struct {
Root string
}
// getContainerCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getContainerCLIConfigFrom(toml *toml.Tree) *ContainerCLIConfig {
cfg := getDefaultContainerCLIConfig()
if toml == nil {
return cfg
}
cfg.Root = toml.GetDefault("nvidia-container-cli.root", cfg.Root).(string)
return cfg
}
// getDefaultContainerCLIConfig defines the default values for the config
func getDefaultContainerCLIConfig() *ContainerCLIConfig {
c := ContainerCLIConfig{
Root: "",
}
return &c
Root string `toml:"root"`
Path string `toml:"path"`
Environment []string `toml:"environment"`
Debug string `toml:"debug"`
Ldcache string `toml:"ldcache"`
LoadKmods bool `toml:"load-kmods"`
// NoPivot disables the pivot root operation in the NVIDIA Container CLI.
// This is not exposed in the config if not set.
NoPivot bool `toml:"no-pivot,omitempty"`
NoCgroups bool `toml:"no-cgroups"`
User string `toml:"user"`
Ldconfig string `toml:"ldconfig"`
}

View File

@@ -17,17 +17,26 @@
package config
import (
"fmt"
"io"
"bufio"
"os"
"path"
"path/filepath"
"strings"
"github.com/pelletier/go-toml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
)
const (
configOverride = "XDG_CONFIG_HOME"
configFilePath = "nvidia-container-runtime/config.toml"
nvidiaCTKExecutable = "nvidia-ctk"
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
nvidiaContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
nvidiaContainerRuntimeHookDefaultPath = "/usr/bin/nvidia-container-runtime-hook"
)
var (
@@ -38,77 +47,169 @@ var (
NVIDIAContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
// NVIDIAContainerToolkitExecutable is the executable name for the NVIDIA Container Toolkit (an alias for the NVIDIA Container Runtime Hook)
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
configDir = "/etc/"
)
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
type Config struct {
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
DisableRequire bool `toml:"disable-require"`
SwarmResource string `toml:"swarm-resource"`
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
SupportedDriverCapabilities string `toml:"supported-driver-capabilities"`
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
NVIDIAContainerRuntimeHookConfig RuntimeHookConfig `toml:"nvidia-container-runtime-hook"`
}
// GetConfigFilePath returns the path to the config file for the configured system
func GetConfigFilePath() string {
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
return filepath.Join(XDGConfigDir, configFilePath)
}
return filepath.Join("/etc", configFilePath)
}
// GetConfig sets up the config struct. Values are read from a toml file
// or set via the environment.
func GetConfig() (*Config, error) {
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
configDir = XDGConfigDir
}
configFilePath := path.Join(configDir, configFilePath)
tomlFile, err := os.Open(configFilePath)
if err != nil {
return getDefaultConfig(), nil
}
defer tomlFile.Close()
cfg, err := loadConfigFrom(tomlFile)
if err != nil {
return nil, fmt.Errorf("failed to read config values: %v", err)
}
return cfg, nil
}
// loadRuntimeConfigFrom reads the config from the specified Reader
func loadConfigFrom(reader io.Reader) (*Config, error) {
toml, err := toml.LoadReader(reader)
cfg, err := New(
WithConfigFile(GetConfigFilePath()),
)
if err != nil {
return nil, err
}
return getConfigFrom(toml)
return cfg.Config()
}
// getConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getConfigFrom(toml *toml.Tree) (*Config, error) {
cfg := getDefaultConfig()
// GetDefault defines the default values for the config
func GetDefault() (*Config, error) {
d := Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: image.SupportedDriverCapabilities.String(),
NVIDIAContainerCLIConfig: ContainerCLIConfig{
LoadKmods: true,
Ldconfig: getLdConfigPath(),
},
NVIDIACTKConfig: CTKConfig{
Path: nvidiaCTKExecutable,
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{cdi.AnnotationPrefix},
SpecDirs: cdi.DefaultSpecDirs,
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: NVIDIAContainerRuntimeHookExecutable,
},
}
return &d, nil
}
if toml == nil {
return cfg, nil
func getLdConfigPath() string {
if _, err := os.Stat("/sbin/ldconfig.real"); err == nil {
return "@/sbin/ldconfig.real"
}
return "@/sbin/ldconfig"
}
// getCommentedUserGroup returns whether the nvidia-container-cli user and group config option should be commented.
func getCommentedUserGroup() bool {
uncommentIf := map[string]bool{
"suse": true,
"opensuse": true,
}
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
runtimeConfig, err := getRuntimeConfigFrom(toml)
idsLike := getDistIDLike()
for _, id := range idsLike {
if uncommentIf[id] {
return false
}
}
return true
}
// getDistIDLike returns the ID_LIKE field from /etc/os-release.
func getDistIDLike() []string {
releaseFile, err := os.Open("/etc/os-release")
if err != nil {
return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err)
return nil
}
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
defer releaseFile.Close()
return cfg, nil
scanner := bufio.NewScanner(releaseFile)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "ID_LIKE=") {
value := strings.Trim(strings.TrimPrefix(line, "ID_LIKE="), "\"")
return strings.Split(value, " ")
}
}
return nil
}
// getDefaultConfig defines the default values for the config
func getDefaultConfig() *Config {
c := Config{
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
NVIDIACTKConfig: *getDefaultCTKConfig(),
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
// ResolveNVIDIACTKPath resolves the path to the nvidia-ctk binary.
// This executable is used in hooks and needs to be an absolute path.
// If the path is specified as an absolute path, it is used directly
// without checking for existence of an executable at that path.
func ResolveNVIDIACTKPath(logger logger.Interface, nvidiaCTKPath string) string {
return resolveWithDefault(
logger,
"NVIDIA Container Toolkit CLI",
nvidiaCTKPath,
nvidiaCTKDefaultFilePath,
)
}
// ResolveNVIDIAContainerRuntimeHookPath resolves the path the nvidia-container-runtime-hook binary.
func ResolveNVIDIAContainerRuntimeHookPath(logger logger.Interface, nvidiaContainerRuntimeHookPath string) string {
return resolveWithDefault(
logger,
"NVIDIA Container Runtime Hook",
nvidiaContainerRuntimeHookPath,
nvidiaContainerRuntimeHookDefaultPath,
)
}
// resolveWithDefault resolves the path to the specified binary.
// If an absolute path is specified, it is used directly without searching for the binary.
// If the binary cannot be found in the path, the specified default is used instead.
func resolveWithDefault(logger logger.Interface, label string, path string, defaultPath string) string {
if filepath.IsAbs(path) {
logger.Debugf("Using specified %v path %v", label, path)
return path
}
return &c
if path == "" {
path = filepath.Base(defaultPath)
}
logger.Debugf("Locating %v as %v", label, path)
lookup := lookup.NewExecutableLocator(logger, "")
resolvedPath := defaultPath
targets, err := lookup.Locate(path)
if err != nil {
logger.Warningf("Failed to locate %v: %v", path, err)
} else {
logger.Debugf("Found %v candidates: %v", path, targets)
resolvedPath = targets[0]
}
logger.Debugf("Using %v path %v", label, path)
return resolvedPath
}

View File

@@ -17,7 +17,6 @@
package config
import (
"io/ioutil"
"os"
"path/filepath"
"strings"
@@ -27,38 +26,40 @@ import (
)
func TestGetConfigWithCustomConfig(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
testDir := t.TempDir()
t.Setenv(configOverride, testDir)
filename := filepath.Join(testDir, configFilePath)
// By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
testDir := filepath.Join(wd, "test")
filename := filepath.Join(testDir, configFilePath)
os.Setenv(configOverride, testDir)
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
require.NoError(t, os.WriteFile(filename, contents, 0766))
cfg, err := GetConfig()
require.NoError(t, err)
require.Equal(t, cfg.NVIDIAContainerRuntimeConfig.DebugFilePath, "/nvidia-container-toolkit.log")
require.Equal(t, "/nvidia-container-toolkit.log", cfg.NVIDIAContainerRuntimeConfig.DebugFilePath)
}
func TestGetConfig(t *testing.T) {
testCases := []struct {
description string
contents []string
expectedError error
expectedConfig *Config
description string
contents []string
expectedError error
inspectLdconfig bool
expectedConfig *Config
}{
{
description: "empty config is default",
description: "empty config is default",
inspectLdconfig: true,
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
Root: "",
LoadKmods: true,
Ldconfig: "WAS_CHECKED",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
@@ -69,8 +70,16 @@ func TestGetConfig(t *testing.T) {
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
@@ -79,19 +88,30 @@ func TestGetConfig(t *testing.T) {
{
description: "config options set inline",
contents: []string{
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
"supported-driver-capabilities = \"compute,utility\"",
"nvidia-container-cli.root = \"/bar/baz\"",
"nvidia-container-cli.load-kmods = false",
"nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"",
"nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.experimental = true",
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
"nvidia-container-runtime.log-level = \"debug\"",
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
"nvidia-container-runtime.mode = \"not-auto\"",
"nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"",
"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"nvidia-container-runtime.modes.cdi.spec-dirs = [\"/except/etc/cdi\", \"/not/var/run/cdi\",]",
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"nvidia-container-runtime-hook.path = \"/foo/bar/nvidia-container-runtime-hook\"",
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: false,
SupportedDriverCapabilities: "compute,utility",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
Root: "/bar/baz",
LoadKmods: false,
Ldconfig: "/foo/bar/ldconfig",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
@@ -102,8 +122,22 @@ func TestGetConfig(t *testing.T) {
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "example.vendor.com/device",
AnnotationPrefixes: []string{
"cdi.k8s.io/",
"example.vendor.com/",
},
SpecDirs: []string{
"/except/etc/cdi",
"/not/var/run/cdi",
},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "/foo/bar/nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
@@ -112,23 +146,36 @@ func TestGetConfig(t *testing.T) {
{
description: "config options set in section",
contents: []string{
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
"supported-driver-capabilities = \"compute,utility\"",
"[nvidia-container-cli]",
"root = \"/bar/baz\"",
"load-kmods = false",
"ldconfig = \"/foo/bar/ldconfig\"",
"[nvidia-container-runtime]",
"debug = \"/foo/bar\"",
"experimental = true",
"discover-mode = \"not-legacy\"",
"log-level = \"debug\"",
"runtimes = [\"/some/runtime\",]",
"mode = \"not-auto\"",
"[nvidia-container-runtime.modes.cdi]",
"default-kind = \"example.vendor.com/device\"",
"annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"spec-dirs = [\"/except/etc/cdi\", \"/not/var/run/cdi\",]",
"[nvidia-container-runtime.modes.csv]",
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"[nvidia-container-runtime-hook]",
"path = \"/foo/bar/nvidia-container-runtime-hook\"",
"[nvidia-ctk]",
"path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: false,
SupportedDriverCapabilities: "compute,utility",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
Root: "/bar/baz",
LoadKmods: false,
Ldconfig: "/foo/bar/ldconfig",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
@@ -139,8 +186,22 @@ func TestGetConfig(t *testing.T) {
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "example.vendor.com/device",
AnnotationPrefixes: []string{
"cdi.k8s.io/",
"example.vendor.com/",
},
SpecDirs: []string{
"/except/etc/cdi",
"/not/var/run/cdi",
},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "/foo/bar/nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
@@ -152,12 +213,24 @@ func TestGetConfig(t *testing.T) {
t.Run(tc.description, func(t *testing.T) {
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
cfg, err := loadConfigFrom(reader)
tomlCfg, err := loadConfigTomlFrom(reader)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
cfg, err := tomlCfg.Config()
require.NoError(t, err)
// We first handle the ldconfig path since this is currently system-dependent.
if tc.inspectLdconfig {
ldconfig := cfg.NVIDIAContainerCLIConfig.Ldconfig
require.True(t, strings.HasPrefix(ldconfig, "@/sbin/ldconfig"))
remaining := strings.TrimPrefix(ldconfig, "@/sbin/ldconfig")
require.True(t, remaining == ".real" || remaining == "")
cfg.NVIDIAContainerCLIConfig.Ldconfig = "WAS_CHECKED"
}
require.EqualValues(t, tc.expectedConfig, cfg)
})

View File

@@ -1,125 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package crio
import (
"fmt"
"os"
"github.com/pelletier/go-toml"
log "github.com/sirupsen/logrus"
)
// LoadConfig loads the cri-o config from disk
func LoadConfig(config string) (*toml.Tree, error) {
log.Infof("Loading config: %v", config)
info, err := os.Stat(config)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
configFile := config
if os.IsNotExist(err) {
configFile = "/dev/null"
log.Infof("Config file does not exist, creating new one")
}
cfg, err := toml.LoadFile(configFile)
if err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return cfg, nil
}
// UpdateConfig updates the cri-o config to include the NVIDIA Container Runtime
func UpdateConfig(config *toml.Tree, runtimeClass string, runtimePath string, setAsDefault bool) error {
switch runc := config.Get("crio.runtime.runtimes.runc").(type) {
case *toml.Tree:
runc, _ = toml.Load(runc.String())
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass}, runc)
}
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_path"}, runtimePath)
config.SetPath([]string{"crio", "runtime", "runtimes", runtimeClass, "runtime_type"}, "oci")
if setAsDefault {
config.SetPath([]string{"crio", "runtime", "default_runtime"}, runtimeClass)
}
return nil
}
// RevertConfig reverts the cri-o config to remove the NVIDIA Container Runtime
func RevertConfig(config *toml.Tree, runtimeClass string) error {
if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok {
if runtimeClass == runtime {
config.DeletePath([]string{"crio", "runtime", "default_runtime"})
}
}
runtimeClassPath := []string{"crio", "runtime", "runtimes", runtimeClass}
config.DeletePath(runtimeClassPath)
for i := 0; i < len(runtimeClassPath); i++ {
remainingPath := runtimeClassPath[:len(runtimeClassPath)-i]
if entry, ok := config.GetPath(remainingPath).(*toml.Tree); ok {
if len(entry.Keys()) != 0 {
break
}
config.DeletePath(remainingPath)
}
}
return nil
}
// FlushConfig flushes the updated/reverted config out to disk
func FlushConfig(config string, cfg *toml.Tree) error {
log.Infof("Flushing config")
output, err := cfg.ToTomlString()
if err != nil {
return fmt.Errorf("unable to convert to TOML: %v", err)
}
switch len(output) {
case 0:
err := os.Remove(config)
if err != nil {
return fmt.Errorf("unable to remove empty file: %v", err)
}
log.Infof("Config empty, removing file")
default:
f, err := os.Create(config)
if err != nil {
return fmt.Errorf("unable to open '%v' for writing: %v", config, err)
}
defer f.Close()
_, err = f.WriteString(output)
if err != nil {
return fmt.Errorf("unable to write output: %v", err)
}
}
log.Infof("Successfully flushed config")
return nil
}

View File

@@ -1,117 +0,0 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
log "github.com/sirupsen/logrus"
)
// LoadConfig loads the docker config from disk
func LoadConfig(configFilePath string) (map[string]interface{}, error) {
log.Infof("Loading docker config from %v", configFilePath)
info, err := os.Stat(configFilePath)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
cfg := make(map[string]interface{})
if os.IsNotExist(err) {
log.Infof("Config file does not exist, creating new one")
return cfg, nil
}
readBytes, err := ioutil.ReadFile(configFilePath)
if err != nil {
return nil, fmt.Errorf("unable to read config: %v", err)
}
reader := bytes.NewReader(readBytes)
if err := json.NewDecoder(reader).Decode(&cfg); err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return cfg, nil
}
// UpdateConfig updates the docker config to include the nvidia runtimes
func UpdateConfig(config map[string]interface{}, runtimeName string, runtimePath string, setAsDefault bool) error {
// Read the existing runtimes
runtimes := make(map[string]interface{})
if _, exists := config["runtimes"]; exists {
runtimes = config["runtimes"].(map[string]interface{})
}
// Add / update the runtime definitions
runtimes[runtimeName] = map[string]interface{}{
"path": runtimePath,
"args": []string{},
}
// Update the runtimes definition
if len(runtimes) > 0 {
config["runtimes"] = runtimes
}
if setAsDefault {
config["default-runtime"] = runtimeName
}
return nil
}
// FlushConfig flushes the updated/reverted config out to disk
func FlushConfig(cfg map[string]interface{}, configFilePath string) error {
log.Infof("Flushing docker config to %v", configFilePath)
output, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return fmt.Errorf("unable to convert to JSON: %v", err)
}
switch len(output) {
case 0:
err := os.Remove(configFilePath)
if err != nil {
return fmt.Errorf("unable to remove empty file: %v", err)
}
log.Infof("Config empty, removing file")
default:
f, err := os.Create(configFilePath)
if err != nil {
return fmt.Errorf("unable to open %v for writing: %v", configFilePath, err)
}
defer f.Close()
_, err = f.WriteString(string(output))
if err != nil {
return fmt.Errorf("unable to write output: %v", err)
}
}
log.Infof("Successfully flushed config")
return nil
}

36
internal/config/hook.go Normal file
View File

@@ -0,0 +1,36 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
// RuntimeHookConfig stores the config options for the NVIDIA Container Runtime
type RuntimeHookConfig struct {
// Path specifies the path to the NVIDIA Container Runtime hook binary.
// If an executable name is specified, this will be resolved in the path.
Path string `toml:"path"`
// SkipModeDetection disables the mode check for the runtime hook.
SkipModeDetection bool `toml:"skip-mode-detection"`
}
// GetDefaultRuntimeHookConfig defines the default values for the config
func GetDefaultRuntimeHookConfig() (*RuntimeHookConfig, error) {
cfg, err := GetDefault()
if err != nil {
return nil, err
}
return &cfg.NVIDIAContainerRuntimeHookConfig, nil
}

View File

@@ -0,0 +1,73 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"fmt"
"strings"
)
type builder struct {
env []string
disableRequire bool
}
// New creates a new CUDA image from the input options.
func New(opt ...Option) (CUDA, error) {
b := &builder{}
for _, o := range opt {
o(b)
}
return b.build()
}
// build creates a CUDA image from the builder.
func (b builder) build() (CUDA, error) {
c := make(CUDA)
for _, e := range b.env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid environment variable: %v", e)
}
c[parts[0]] = parts[1]
}
if b.disableRequire {
c[envNVDisableRequire] = "true"
}
return c, nil
}
// Option is a functional option for creating a CUDA image.
type Option func(*builder)
// WithDisableRequire sets the disable require option.
func WithDisableRequire(disableRequire bool) Option {
return func(b *builder) {
b.disableRequire = disableRequire
}
}
// WithEnv sets the environment variables to use when creating the CUDA image.
func WithEnv(env []string) Option {
return func(b *builder) {
b.env = env
}
}

View File

@@ -16,12 +16,18 @@
package image
import (
"sort"
"strings"
)
// DriverCapability represents the possible values of NVIDIA_DRIVER_CAPABILITIES
type DriverCapability string
// Constants for the supported driver capabilities
const (
DriverCapabilityAll DriverCapability = "all"
DriverCapabilityNone DriverCapability = "none"
DriverCapabilityCompat32 DriverCapability = "compat32"
DriverCapabilityCompute DriverCapability = "compute"
DriverCapabilityDisplay DriverCapability = "display"
@@ -31,12 +37,37 @@ const (
DriverCapabilityVideo DriverCapability = "video"
)
var (
driverCapabilitiesNone = NewDriverCapabilities()
driverCapabilitiesAll = NewDriverCapabilities("all")
// DefaultDriverCapabilities sets the value for driver capabilities if no value is set.
DefaultDriverCapabilities = NewDriverCapabilities("utility,compute")
// SupportedDriverCapabilities defines the set of all supported driver capabilities.
SupportedDriverCapabilities = NewDriverCapabilities("compute,compat32,graphics,utility,video,display,ngx")
)
// NewDriverCapabilities creates a set of driver capabilities from the specified capabilities
func NewDriverCapabilities(capabilities ...string) DriverCapabilities {
dc := make(DriverCapabilities)
for _, capability := range capabilities {
for _, c := range strings.Split(capability, ",") {
trimmed := strings.TrimSpace(c)
if trimmed == "" {
continue
}
dc[DriverCapability(trimmed)] = true
}
}
return dc
}
// DriverCapabilities represents the NVIDIA_DRIVER_CAPABILITIES set for the specified image.
type DriverCapabilities map[DriverCapability]bool
// Has check whether the specified capability is selected.
func (c DriverCapabilities) Has(capability DriverCapability) bool {
if c[DriverCapabilityAll] {
if c.IsAll() {
return true
}
return c[capability]
@@ -44,11 +75,72 @@ func (c DriverCapabilities) Has(capability DriverCapability) bool {
// Any checks whether any of the specified capabilites are set
func (c DriverCapabilities) Any(capabilities ...DriverCapability) bool {
if c.IsAll() {
return true
}
for _, cap := range capabilities {
if c.Has(cap) {
return true
}
}
return false
}
// List returns the list of driver capabilities.
// The list is sorted.
func (c DriverCapabilities) List() []string {
var capabilities []string
for capability := range c {
capabilities = append(capabilities, string(capability))
}
sort.Strings(capabilities)
return capabilities
}
// String returns the string repesentation of the driver capabilities.
func (c DriverCapabilities) String() string {
if c.IsAll() {
return "all"
}
return strings.Join(c.List(), ",")
}
// IsAll indicates whether the set of capabilities is `all`
func (c DriverCapabilities) IsAll() bool {
return c[DriverCapabilityAll]
}
// Intersection returns a new set which includes the item in BOTH d and s2.
// For example: d = {a1, a2} s2 = {a2, a3} s1.Intersection(s2) = {a2}
func (c DriverCapabilities) Intersection(s2 DriverCapabilities) DriverCapabilities {
if s2.IsAll() {
return c
}
if c.IsAll() {
return s2
}
intersection := make(DriverCapabilities)
for capability := range s2 {
if c[capability] {
intersection[capability] = true
}
}
return intersection
}
// IsSuperset returns true if and only if d is a superset of s2.
func (c DriverCapabilities) IsSuperset(s2 DriverCapabilities) bool {
if c.IsAll() {
return true
}
for capability := range s2 {
if !c[capability] {
return false
}
}
return true
}

View File

@@ -0,0 +1,134 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestDriverCapabilitiesIntersection(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
supportedCapabilities DriverCapabilities
expectedIntersection DriverCapabilities
}{
{
capabilities: driverCapabilitiesNone,
supportedCapabilities: driverCapabilitiesNone,
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: driverCapabilitiesAll,
supportedCapabilities: driverCapabilitiesNone,
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: driverCapabilitiesAll,
supportedCapabilities: SupportedDriverCapabilities,
expectedIntersection: SupportedDriverCapabilities,
},
{
capabilities: SupportedDriverCapabilities,
supportedCapabilities: driverCapabilitiesAll,
expectedIntersection: SupportedDriverCapabilities,
},
{
capabilities: driverCapabilitiesNone,
supportedCapabilities: driverCapabilitiesAll,
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: driverCapabilitiesNone,
supportedCapabilities: NewDriverCapabilities("cap1"),
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: NewDriverCapabilities("cap0,cap1"),
supportedCapabilities: NewDriverCapabilities("cap1,cap0"),
expectedIntersection: NewDriverCapabilities("cap0,cap1"),
},
{
capabilities: DefaultDriverCapabilities,
supportedCapabilities: SupportedDriverCapabilities,
expectedIntersection: DefaultDriverCapabilities,
},
{
capabilities: NewDriverCapabilities("compute,compat32,graphics,utility,video,display"),
supportedCapabilities: NewDriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
expectedIntersection: NewDriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
{
capabilities: NewDriverCapabilities("cap1"),
supportedCapabilities: driverCapabilitiesNone,
expectedIntersection: driverCapabilitiesNone,
},
{
capabilities: NewDriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
supportedCapabilities: NewDriverCapabilities("compute,compat32,graphics,utility,video,display"),
expectedIntersection: NewDriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
intersection := tc.supportedCapabilities.Intersection(tc.capabilities)
require.EqualValues(t, tc.expectedIntersection, intersection)
})
}
}
func TestDriverCapabilitiesList(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
expected []string
}{
{
capabilities: NewDriverCapabilities(""),
},
{
capabilities: NewDriverCapabilities(" "),
},
{
capabilities: NewDriverCapabilities(","),
},
{
capabilities: NewDriverCapabilities(",cap"),
expected: []string{"cap"},
},
{
capabilities: NewDriverCapabilities("cap,"),
expected: []string{"cap"},
},
{
capabilities: NewDriverCapabilities("cap0,,cap1"),
expected: []string{"cap0", "cap1"},
},
{
capabilities: NewDriverCapabilities("cap1,cap0,cap3"),
expected: []string{"cap0", "cap1", "cap3"},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
require.EqualValues(t, tc.expected, tc.capabilities.List())
})
}
}

View File

@@ -42,27 +42,18 @@ type CUDA map[string]string
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
// The process environment is read (if present) to construc the CUDA Image.
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
if spec == nil || spec.Process == nil {
return NewCUDAImageFromEnv(nil)
var env []string
if spec != nil && spec.Process != nil {
env = spec.Process.Env
}
return NewCUDAImageFromEnv(spec.Process.Env)
return New(WithEnv(env))
}
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
// is a list of strings of the form ENVAR=VALUE.
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
c := make(CUDA)
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid environment variable: %v", e)
}
c[parts[0]] = parts[1]
}
return c, nil
return New(WithEnv(env))
}
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
@@ -77,11 +68,9 @@ func (i CUDA) IsLegacy() bool {
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
// variables.
func (i CUDA) GetRequirements() ([]string, error) {
// TODO: We need not process this if disable require is set, but this will be done
// in a single follow-up to ensure that the behavioural change is accurately captured.
// if i.HasDisableRequire() {
// return nil, nil
// }
if i.HasDisableRequire() {
return nil, nil
}
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
@@ -159,9 +148,10 @@ func (i CUDA) GetDriverCapabilities() DriverCapabilities {
}
func (i CUDA) legacyVersion() (string, error) {
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
cudaVersion := i[envCUDAVersion]
majorMinor, err := parseMajorMinorVersion(cudaVersion)
if err != nil {
return "", fmt.Errorf("invalid CUDA version: %v", err)
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
}
return majorMinor, nil

View File

@@ -106,6 +106,16 @@ func TestGetRequirements(t *testing.T) {
env: []string{"CUDA_VERSION=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
requirements: []string{"cuda>=11.6", "brand=tesla"},
},
{
description: "NVIDIA_DISABLE_REQUIRE ignores requirements",
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla", "NVIDIA_DISABLE_REQUIRE=true"},
requirements: []string{},
},
{
description: "NVIDIA_DISABLE_REQUIRE ignores legacy image requirements",
env: []string{"CUDA_VERSION=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla", "NVIDIA_DISABLE_REQUIRE=true"},
requirements: []string{},
},
}
for _, tc := range testCases {

View File

@@ -0,0 +1,43 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"github.com/opencontainers/runtime-spec/specs-go"
)
const (
capSysAdmin = "CAP_SYS_ADMIN"
)
// IsPrivileged returns true if the container is a privileged container.
func IsPrivileged(s *specs.Spec) bool {
if s.Process.Capabilities == nil {
return false
}
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
for _, c := range s.Process.Capabilities.Bounding {
if c == capSysAdmin {
return true
}
}
return false
}

View File

@@ -16,20 +16,6 @@
package config
import (
"fmt"
"github.com/pelletier/go-toml"
"github.com/sirupsen/logrus"
)
const (
dockerRuncExecutableName = "docker-runc"
runcExecutableName = "runc"
auto = "auto"
)
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
type RuntimeConfig struct {
DebugFilePath string `toml:"debug"`
@@ -50,52 +36,22 @@ type modesConfig struct {
type cdiModeConfig struct {
// SpecDirs allows for the default spec dirs for CDI to be overridden
SpecDirs []string `toml:"spec-dirs"`
// DefaultKind sets the default kind to be used when constructing fully-qualified CDI device names
DefaultKind string `toml:"default-kind"`
// AnnotationPrefixes sets the allowed prefixes for CDI annotation-based device injection
AnnotationPrefixes []string `toml:"annotation-prefixes"`
}
type csvModeConfig struct {
MountSpecPath string `toml:"mount-spec-path"`
}
// dummy allows us to unmarshal only a RuntimeConfig from a *toml.Tree
type dummy struct {
Runtime RuntimeConfig `toml:"nvidia-container-runtime"`
}
// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
cfg := GetDefaultRuntimeConfig()
if toml == nil {
return cfg, nil
}
d := dummy{
Runtime: *cfg,
}
if err := toml.Unmarshal(&d); err != nil {
return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err)
}
return &d.Runtime, nil
}
// GetDefaultRuntimeConfig defines the default values for the config
func GetDefaultRuntimeConfig() *RuntimeConfig {
c := RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: logrus.InfoLevel.String(),
Runtimes: []string{
dockerRuncExecutableName,
runcExecutableName,
},
Mode: auto,
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
func GetDefaultRuntimeConfig() (*RuntimeConfig, error) {
cfg, err := GetDefault()
if err != nil {
return nil, err
}
return &c
return &cfg.NVIDIAContainerRuntimeConfig, nil
}

218
internal/config/toml.go Normal file
View File

@@ -0,0 +1,218 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"bytes"
"fmt"
"io"
"os"
"regexp"
"github.com/pelletier/go-toml"
)
// Toml is a type for the TOML representation of a config.
type Toml toml.Tree
type options struct {
configFile string
required bool
}
// Option is a functional option for loading TOML config files.
type Option func(*options)
// WithConfigFile sets the config file option.
func WithConfigFile(configFile string) Option {
return func(o *options) {
o.configFile = configFile
}
}
// WithRequired sets the required option.
// If this is set to true, a failure to open the specified file is treated as an error
func WithRequired(required bool) Option {
return func(o *options) {
o.required = required
}
}
// New creates a new toml tree based on the provided options
func New(opts ...Option) (*Toml, error) {
o := &options{}
for _, opt := range opts {
opt(o)
}
return o.loadConfigToml()
}
func (o options) loadConfigToml() (*Toml, error) {
filename := o.configFile
if filename == "" {
return defaultToml()
}
_, err := os.Stat(filename)
if os.IsNotExist(err) && o.required {
return nil, os.ErrNotExist
}
tomlFile, err := os.Open(filename)
if os.IsNotExist(err) {
return defaultToml()
} else if err != nil {
return nil, fmt.Errorf("failed to load specified config file: %w", err)
}
defer tomlFile.Close()
return loadConfigTomlFrom(tomlFile)
}
func defaultToml() (*Toml, error) {
cfg, err := GetDefault()
if err != nil {
return nil, err
}
contents, err := toml.Marshal(cfg)
if err != nil {
return nil, err
}
return loadConfigTomlFrom(bytes.NewReader(contents))
}
func loadConfigTomlFrom(reader io.Reader) (*Toml, error) {
tree, err := toml.LoadReader(reader)
if err != nil {
return nil, err
}
return (*Toml)(tree), nil
}
// Config returns the typed config associated with the toml tree.
func (t *Toml) Config() (*Config, error) {
cfg, err := GetDefault()
if err != nil {
return nil, err
}
if t == nil {
return cfg, nil
}
if err := t.Unmarshal(cfg); err != nil {
return nil, fmt.Errorf("failed to unmarshal config: %v", err)
}
return cfg, nil
}
// Unmarshal wraps the toml.Tree Unmarshal function.
func (t *Toml) Unmarshal(v interface{}) error {
return (*toml.Tree)(t).Unmarshal(v)
}
// Save saves the config to the specified Writer.
func (t *Toml) Save(w io.Writer) (int64, error) {
contents, err := t.contents()
if err != nil {
return 0, err
}
n, err := w.Write(contents)
return int64(n), err
}
// contents returns the config TOML as a byte slice.
// Any required formatting is applied.
func (t Toml) contents() ([]byte, error) {
commented := t.commentDefaults()
buffer := bytes.NewBuffer(nil)
enc := toml.NewEncoder(buffer).Indentation("")
if err := enc.Encode((*toml.Tree)(commented)); err != nil {
return nil, fmt.Errorf("invalid config: %v", err)
}
return t.format(buffer.Bytes())
}
// format fixes the comments for the config to ensure that they start in column
// 1 and are not followed by a space.
func (t Toml) format(contents []byte) ([]byte, error) {
r, err := regexp.Compile(`(\n*)\s*?#\s*(\S.*)`)
if err != nil {
return nil, fmt.Errorf("unable to compile regexp: %v", err)
}
replaced := r.ReplaceAll(contents, []byte("$1#$2"))
return replaced, nil
}
// Delete deletes the specified key from the TOML config.
func (t *Toml) Delete(key string) error {
return (*toml.Tree)(t).Delete(key)
}
// Get returns the value for the specified key.
func (t *Toml) Get(key string) interface{} {
return (*toml.Tree)(t).Get(key)
}
// Set sets the specified key to the specified value in the TOML config.
func (t *Toml) Set(key string, value interface{}) {
(*toml.Tree)(t).Set(key, value)
}
// commentDefaults applies the required comments for default values to the Toml.
func (t *Toml) commentDefaults() *Toml {
asToml := (*toml.Tree)(t)
commentedDefaults := map[string]interface{}{
"swarm-resource": "DOCKER_RESOURCE_GPU",
"accept-nvidia-visible-devices-envvar-when-unprivileged": true,
"accept-nvidia-visible-devices-as-volume-mounts": false,
"nvidia-container-cli.root": "/run/nvidia/driver",
"nvidia-container-cli.path": "/usr/bin/nvidia-container-cli",
"nvidia-container-cli.debug": "/var/log/nvidia-container-toolkit.log",
"nvidia-container-cli.ldcache": "/etc/ld.so.cache",
"nvidia-container-cli.no-cgroups": false,
"nvidia-container-cli.user": "root:video",
"nvidia-container-runtime.debug": "/var/log/nvidia-container-runtime.log",
}
for k, v := range commentedDefaults {
set := asToml.Get(k)
if !shouldComment(k, v, set) {
continue
}
asToml.SetWithComment(k, "", true, v)
}
return (*Toml)(asToml)
}
func shouldComment(key string, defaultValue interface{}, setTo interface{}) bool {
if key == "nvidia-container-cli.user" && !getCommentedUserGroup() {
return false
}
if key == "nvidia-container-runtime.debug" && setTo == "/dev/null" {
return true
}
if setTo == nil || defaultValue == setTo || setTo == "" {
return true
}
return false
}

View File

@@ -0,0 +1,248 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"bytes"
"strings"
"testing"
"github.com/pelletier/go-toml"
"github.com/stretchr/testify/require"
)
func TestTomlSave(t *testing.T) {
testCases := []struct {
description string
config *Toml
expected string
}{
{
description: "defaultConfig",
config: func() *Toml {
t, _ := defaultToml()
// TODO: We handle the ldconfig path specifically, since this is platform
// dependent.
(*toml.Tree)(t).Set("nvidia-container-cli.ldconfig", "OVERRIDDEN")
return t
}(),
expected: `
#accept-nvidia-visible-devices-as-volume-mounts = false
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
#swarm-resource = "DOCKER_RESOURCE_GPU"
[nvidia-container-cli]
#debug = "/var/log/nvidia-container-toolkit.log"
environment = []
#ldcache = "/etc/ld.so.cache"
ldconfig = "OVERRIDDEN"
load-kmods = true
#no-cgroups = false
#path = "/usr/bin/nvidia-container-cli"
#root = "/run/nvidia/driver"
#user = "root:video"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime-hook]
path = "nvidia-container-runtime-hook"
skip-mode-detection = false
[nvidia-ctk]
path = "nvidia-ctk"
`,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
buffer := new(bytes.Buffer)
_, err := tc.config.Save(buffer)
require.NoError(t, err)
require.EqualValues(t,
strings.TrimSpace(tc.expected),
strings.TrimSpace(buffer.String()),
)
})
}
}
func TestFormat(t *testing.T) {
testCases := []struct {
input string
expected string
}{
{
input: "# comment",
expected: "#comment",
},
{
input: " #comment",
expected: "#comment",
},
{
input: " # comment",
expected: "#comment",
},
{
input: strings.Join([]string{
"some",
"# comment",
" # comment",
" #comment",
"other"}, "\n"),
expected: strings.Join([]string{
"some",
"#comment",
"#comment",
"#comment",
"other"}, "\n"),
},
}
for _, tc := range testCases {
t.Run(tc.input, func(t *testing.T) {
actual, _ := (Toml{}).format([]byte(tc.input))
require.Equal(t, tc.expected, string(actual))
})
}
}
func TestGetFormattedConfig(t *testing.T) {
expectedLines := []string{
"#no-cgroups = false",
"#debug = \"/var/log/nvidia-container-toolkit.log\"",
"#debug = \"/var/log/nvidia-container-runtime.log\"",
}
contents, err := createEmpty().contents()
require.NoError(t, err)
lines := strings.Split(string(contents), "\n")
for _, line := range expectedLines {
require.Contains(t, lines, line)
}
}
func TestTomlContents(t *testing.T) {
testCases := []struct {
description string
contents map[string]interface{}
expected string
}{
{
description: "empty config returns commented defaults",
expected: `
#accept-nvidia-visible-devices-as-volume-mounts = false
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#swarm-resource = "DOCKER_RESOURCE_GPU"
[nvidia-container-cli]
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
#no-cgroups = false
#path = "/usr/bin/nvidia-container-cli"
#root = "/run/nvidia/driver"
#user = "root:video"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"`,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
tree, err := toml.TreeFromMap(tc.contents)
require.NoError(t, err)
cfg := (*Toml)(tree)
contents, err := cfg.contents()
require.NoError(t, err)
require.EqualValues(t,
strings.TrimSpace(tc.expected),
strings.TrimSpace(string(contents)),
)
})
}
}
func TestConfigFromToml(t *testing.T) {
testCases := []struct {
description string
contents map[string]interface{}
expectedConfig *Config
}{
{
description: "empty config returns default config",
contents: nil,
expectedConfig: func() *Config {
c, _ := GetDefault()
return c
}(),
},
{
description: "contents overrides default",
contents: map[string]interface{}{
"nvidia-container-runtime": map[string]interface{}{
"debug": "/some/log/file.log",
"mode": "csv",
},
},
expectedConfig: func() *Config {
c, _ := GetDefault()
c.NVIDIAContainerRuntimeConfig.DebugFilePath = "/some/log/file.log"
c.NVIDIAContainerRuntimeConfig.Mode = "csv"
return c
}(),
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
tomlCfg := fromMap(tc.contents)
config, err := tomlCfg.Config()
require.NoError(t, err)
require.EqualValues(t, tc.expectedConfig, config)
})
}
}
func fromMap(c map[string]interface{}) *Toml {
tree, _ := toml.TreeFromMap(c)
return (*Toml)(tree)
}
func createEmpty() *Toml {
return fromMap(nil)
}

View File

@@ -16,31 +16,7 @@
package config
import "github.com/pelletier/go-toml"
// CTKConfig stores the config options for the NVIDIA Container Toolkit CLI (nvidia-ctk)
type CTKConfig struct {
Path string `toml:"path"`
}
// getCTKConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getCTKConfigFrom(toml *toml.Tree) *CTKConfig {
cfg := getDefaultCTKConfig()
if toml == nil {
return cfg
}
cfg.Path = toml.GetDefault("nvidia-ctk.path", cfg.Path).(string)
return cfg
}
// getDefaultCTKConfig defines the default values for the config
func getDefaultCTKConfig() *CTKConfig {
c := CTKConfig{
Path: "nvidia-ctk",
}
return &c
}

View File

@@ -17,8 +17,8 @@
package discover
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
// charDevices is a discover for a list of character devices
@@ -27,7 +27,7 @@ type charDevices mounts
var _ Discover = (*charDevices)(nil)
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
func NewCharDeviceDiscoverer(logger *logrus.Logger, devices []string, root string) Discover {
func NewCharDeviceDiscoverer(logger logger.Interface, devices []string, root string) Discover {
locator := lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
@@ -37,7 +37,7 @@ func NewCharDeviceDiscoverer(logger *logrus.Logger, devices []string, root strin
}
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
func NewDeviceDiscoverer(logger *logrus.Logger, locator lookup.Locator, root string, devices []string) Discover {
func NewDeviceDiscoverer(logger logger.Interface, locator lookup.Locator, root string, devices []string) Discover {
m := NewMounts(logger, locator, root, devices).(*mounts)
return (*charDevices)(m)
@@ -58,7 +58,11 @@ func (d *charDevices) Devices() ([]Device, error) {
}
var devices []Device
for _, mount := range devicesAsMounts {
devices = append(devices, Device(mount))
device := Device{
HostPath: mount.HostPath,
Path: mount.Path,
}
devices = append(devices, device)
}
return devices, nil

View File

@@ -1,107 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
// NewFromCSVFiles creates a discoverer for the specified CSV files. A logger is also supplied.
// The constructed discoverer is comprised of a list, with each element in the list being associated with a
// single CSV files.
func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discover, error) {
if len(files) == 0 {
logger.Warnf("No CSV files specified")
return None{}, nil
}
symlinkLocator := lookup.NewSymlinkLocator(logger, root)
locators := map[csv.MountSpecType]lookup.Locator{
csv.MountSpecDev: lookup.NewCharDeviceLocator(lookup.WithLogger(logger), lookup.WithRoot(root)),
csv.MountSpecDir: lookup.NewDirectoryLocator(logger, root),
// Libraries and symlinks are handled in the same way
csv.MountSpecLib: symlinkLocator,
csv.MountSpecSym: symlinkLocator,
}
var mountSpecs []*csv.MountSpec
for _, filename := range files {
targets, err := loadCSVFile(logger, filename)
if err != nil {
logger.Warnf("Skipping CSV file %v: %v", filename, err)
continue
}
mountSpecs = append(mountSpecs, targets...)
}
return newFromMountSpecs(logger, locators, root, mountSpecs)
}
// loadCSVFile loads the specified CSV file and returns the list of mount specs
func loadCSVFile(logger *logrus.Logger, filename string) ([]*csv.MountSpec, error) {
// Create a discoverer for each file-kind combination
targets, err := csv.NewCSVFileParser(logger, filename).Parse()
if err != nil {
return nil, fmt.Errorf("failed to parse CSV file: %v", err)
}
if len(targets) == 0 {
return nil, fmt.Errorf("CSV file is empty")
}
return targets, nil
}
// newFromMountSpecs creates a discoverer for the CSV file. A logger is also supplied.
// A list of csvDiscoverers is returned, with each being associated with a single MountSpecType.
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, root string, targets []*csv.MountSpec) (Discover, error) {
if len(targets) == 0 {
return &None{}, nil
}
var discoverers []Discover
var mountSpecTypes []csv.MountSpecType
candidatesByType := make(map[csv.MountSpecType][]string)
for _, t := range targets {
if _, exists := candidatesByType[t.Type]; !exists {
mountSpecTypes = append(mountSpecTypes, t.Type)
}
candidatesByType[t.Type] = append(candidatesByType[t.Type], t.Path)
}
for _, t := range mountSpecTypes {
locator, exists := locators[t]
if !exists {
return nil, fmt.Errorf("no locator defined for '%v'", t)
}
var m Discover
switch t {
case csv.MountSpecDev:
m = NewDeviceDiscoverer(logger, locator, root, candidatesByType[t])
default:
m = NewMounts(logger, locator, root, candidatesByType[t])
}
discoverers = append(discoverers, m)
}
return &list{discoverers: discoverers}, nil
}

View File

@@ -1,142 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestNewFromMountSpec(t *testing.T) {
logger, _ := testlog.NewNullLogger()
locators := map[csv.MountSpecType]lookup.Locator{
"dev": &lookup.LocatorMock{},
"lib": &lookup.LocatorMock{},
}
testCases := []struct {
description string
root string
targets []*csv.MountSpec
expectedError error
expectedDiscoverer Discover
}{
{
description: "empty targets returns None discoverer list",
expectedDiscoverer: &None{},
},
{
description: "unexpected locator returns error",
targets: []*csv.MountSpec{
{
Type: "foo",
Path: "bar",
},
},
expectedError: fmt.Errorf("no locator defined for foo"),
},
{
description: "creates discoverers based on type",
targets: []*csv.MountSpec{
{
Type: "dev",
Path: "dev0",
},
{
Type: "lib",
Path: "lib0",
},
{
Type: "dev",
Path: "dev1",
},
},
expectedDiscoverer: &list{
discoverers: []Discover{
(*charDevices)(
&mounts{
logger: logger,
lookup: locators["dev"],
root: "/",
required: []string{"dev0", "dev1"},
},
),
&mounts{
logger: logger,
lookup: locators["lib"],
root: "/",
required: []string{"lib0"},
},
},
},
},
{
description: "sets root",
targets: []*csv.MountSpec{
{
Type: "dev",
Path: "dev0",
},
{
Type: "lib",
Path: "lib0",
},
{
Type: "dev",
Path: "dev1",
},
},
root: "/some/root",
expectedDiscoverer: &list{
discoverers: []Discover{
(*charDevices)(
&mounts{
logger: logger,
lookup: locators["dev"],
root: "/some/root",
required: []string{"dev0", "dev1"},
},
),
&mounts{
logger: logger,
lookup: locators["lib"],
root: "/some/root",
required: []string{"lib0"},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
discoverer, err := newFromMountSpecs(logger, locators, tc.root, tc.targets)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, tc.expectedDiscoverer, discoverer)
})
}
}

View File

@@ -16,12 +16,6 @@
package discover
// Config represents the configuration options for discovery
type Config struct {
Root string
NvidiaCTKPath string
}
// Device represents a discovered character device.
type Device struct {
HostPath string
@@ -32,6 +26,7 @@ type Device struct {
type Mount struct {
HostPath string
Path string
Options []string
}
// Hook represents a discovered hook.

View File

@@ -13,25 +13,25 @@ var _ Discover = &DiscoverMock{}
// DiscoverMock is a mock implementation of Discover.
//
// func TestSomethingThatUsesDiscover(t *testing.T) {
// func TestSomethingThatUsesDiscover(t *testing.T) {
//
// // make and configure a mocked Discover
// mockedDiscover := &DiscoverMock{
// DevicesFunc: func() ([]Device, error) {
// panic("mock out the Devices method")
// },
// HooksFunc: func() ([]Hook, error) {
// panic("mock out the Hooks method")
// },
// MountsFunc: func() ([]Mount, error) {
// panic("mock out the Mounts method")
// },
// }
// // make and configure a mocked Discover
// mockedDiscover := &DiscoverMock{
// DevicesFunc: func() ([]Device, error) {
// panic("mock out the Devices method")
// },
// HooksFunc: func() ([]Hook, error) {
// panic("mock out the Hooks method")
// },
// MountsFunc: func() ([]Mount, error) {
// panic("mock out the Mounts method")
// },
// }
//
// // use mockedDiscover in code that requires Discover
// // and then make assertions.
// // use mockedDiscover in code that requires Discover
// // and then make assertions.
//
// }
// }
type DiscoverMock struct {
// DevicesFunc mocks the Devices method.
DevicesFunc func() ([]Device, error)
@@ -78,7 +78,8 @@ func (mock *DiscoverMock) Devices() ([]Device, error) {
// DevicesCalls gets all the calls that were made to Devices.
// Check the length with:
// len(mockedDiscover.DevicesCalls())
//
// len(mockedDiscover.DevicesCalls())
func (mock *DiscoverMock) DevicesCalls() []struct {
} {
var calls []struct {
@@ -108,7 +109,8 @@ func (mock *DiscoverMock) Hooks() ([]Hook, error) {
// HooksCalls gets all the calls that were made to Hooks.
// Check the length with:
// len(mockedDiscover.HooksCalls())
//
// len(mockedDiscover.HooksCalls())
func (mock *DiscoverMock) HooksCalls() []struct {
} {
var calls []struct {
@@ -138,7 +140,8 @@ func (mock *DiscoverMock) Mounts() ([]Mount, error) {
// MountsCalls gets all the calls that were made to Mounts.
// Check the length with:
// len(mockedDiscover.MountsCalls())
//
// len(mockedDiscover.MountsCalls())
func (mock *DiscoverMock) MountsCalls() []struct {
} {
var calls []struct {

View File

@@ -16,7 +16,7 @@
package discover
import "github.com/sirupsen/logrus"
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
// Filter defines an interface for filtering discovered entities
type Filter interface {
@@ -26,12 +26,12 @@ type Filter interface {
// filtered represents a filtered discoverer
type filtered struct {
Discover
logger *logrus.Logger
logger logger.Interface
filter Filter
}
// newFilteredDisoverer creates a discoverer that applies the specified filter to the returned entities of the discoverer
func newFilteredDisoverer(logger *logrus.Logger, applyTo Discover, filter Filter) Discover {
func newFilteredDisoverer(logger logger.Interface, applyTo Discover, filter Filter) Discover {
return filtered{
Discover: applyTo,
logger: logger,

View File

@@ -17,19 +17,19 @@
package discover
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
type gdsDeviceDiscoverer struct {
None
logger *logrus.Logger
logger logger.Interface
devices Discover
mounts Discover
}
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
func NewGDSDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
func NewGDSDiscoverer(logger logger.Interface, root string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
[]string{"/dev/nvidia-fs*"},
@@ -38,7 +38,7 @@ func NewGDSDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
udev := NewMounts(
logger,
lookup.NewDirectoryLocator(logger, root),
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(root)),
root,
[]string{"/run/udev"},
)

View File

@@ -20,29 +20,29 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
)
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
func NewGraphicsDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, cfg *Config) (Discover, error) {
root := cfg.Root
mounts, err := NewGraphicsMountsDiscoverer(logger, root)
func NewGraphicsDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string, nvidiaCTKPath string) (Discover, error) {
mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
if err != nil {
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
}
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, root)
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
}
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, cfg)
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, driverRoot, nvidiaCTKPath)
discover := Merge(
Merge(drmDeviceNodes, drmByPathSymlinks),
@@ -53,15 +53,15 @@ func NewGraphicsDiscoverer(logger *logrus.Logger, devices image.VisibleDevices,
}
// NewGraphicsMountsDiscoverer creates a discoverer for the mounts required by graphics tools such as vulkan.
func NewGraphicsMountsDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
locator, err := lookup.NewLibraryLocator(logger, root)
func NewGraphicsMountsDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (Discover, error) {
locator, err := lookup.NewLibraryLocator(logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct library locator: %v", err)
}
libraries := NewMounts(
logger,
locator,
root,
driverRoot,
[]string{
"libnvidia-egl-gbm.so",
},
@@ -71,10 +71,10 @@ func NewGraphicsMountsDiscoverer(logger *logrus.Logger, root string) (Discover,
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths("/etc", "/usr/share"),
),
root,
driverRoot,
[]string{
"glvnd/egl_vendor.d/10_nvidia.json",
"vulkan/icd.d/nvidia_icd.json",
@@ -84,9 +84,12 @@ func NewGraphicsMountsDiscoverer(logger *logrus.Logger, root string) (Discover,
},
)
xorg := optionalXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
discover := Merge(
libraries,
jsonMounts,
xorg,
)
return discover, nil
@@ -94,18 +97,18 @@ func NewGraphicsMountsDiscoverer(logger *logrus.Logger, root string) (Discover,
type drmDevicesByPath struct {
None
logger *logrus.Logger
logger logger.Interface
nvidiaCTKPath string
root string
driverRoot string
devicesFrom Discover
}
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
func newCreateDRMByPathSymlinks(logger *logrus.Logger, devices Discover, cfg *Config) Discover {
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, driverRoot string, nvidiaCTKPath string) Discover {
d := drmDevicesByPath{
logger: logger,
nvidiaCTKPath: FindNvidiaCTK(logger, cfg.NvidiaCTKPath),
root: cfg.Root,
nvidiaCTKPath: nvidiaCTKPath,
driverRoot: driverRoot,
devicesFrom: devices,
}
@@ -152,7 +155,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
linkLocator := lookup.NewFileLocator(
lookup.WithLogger(d.logger),
lookup.WithRoot(d.root),
lookup.WithRoot(d.driverRoot),
)
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
if err != nil {
@@ -178,21 +181,21 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
}
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
func newDRMDeviceDiscoverer(logger *logrus.Logger, devices image.VisibleDevices, root string) (Discover, error) {
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Discover, error) {
allDevices := NewDeviceDiscoverer(
logger,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
lookup.WithRoot(driverRoot),
),
root,
driverRoot,
[]string{
"/dev/dri/card*",
"/dev/dri/renderD*",
},
)
filter, err := newDRMDeviceFilter(logger, devices, root)
filter, err := newDRMDeviceFilter(logger, devices, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
}
@@ -208,8 +211,8 @@ func newDRMDeviceDiscoverer(logger *logrus.Logger, devices image.VisibleDevices,
}
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, root string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(root)
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to read GPU information: %v", err)
}
@@ -243,6 +246,123 @@ func newDRMDeviceFilter(logger *logrus.Logger, devices image.VisibleDevices, roo
return filter, nil
}
type xorgHooks struct {
libraries Discover
driverVersion string
nvidiaCTKPath string
}
var _ Discover = (*xorgHooks)(nil)
// optionalXorgDiscoverer creates a discoverer for Xorg libraries.
// If the creation of the discoverer fails, a None discoverer is returned.
func optionalXorgDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) Discover {
xorg, err := newXorgDiscoverer(logger, driverRoot, nvidiaCTKPath)
if err != nil {
logger.Warningf("Failed to create Xorg discoverer: %v; skipping xorg libraries", err)
return None{}
}
return xorg
}
func newXorgDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string) (Discover, error) {
libCudaPaths, err := cuda.New(
cuda.WithLogger(logger),
cuda.WithDriverRoot(driverRoot),
).Locate(".*.*")
if err != nil {
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
}
libcudaPath := libCudaPaths[0]
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
if version == "" {
return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
}
libRoot := filepath.Dir(libcudaPath)
xorgLibs := NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths(libRoot, "/usr/lib/x86_64-linux-gnu"),
lookup.WithCount(1),
),
driverRoot,
[]string{
"nvidia/xorg/nvidia_drv.so",
fmt.Sprintf("nvidia/xorg/libglxserver_nvidia.so.%s", version),
},
)
xorgHooks := xorgHooks{
libraries: xorgLibs,
driverVersion: version,
nvidiaCTKPath: nvidiaCTKPath,
}
xorgConfg := NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths("/usr/share"),
),
driverRoot,
[]string{"X11/xorg.conf.d/10-nvidia.conf"},
)
d := Merge(
xorgLibs,
xorgConfg,
xorgHooks,
)
return d, nil
}
// Devices returns no devices for Xorg
func (m xorgHooks) Devices() ([]Device, error) {
return nil, nil
}
// Hooks returns a hook to create symlinks for Xorg libraries
func (m xorgHooks) Hooks() ([]Hook, error) {
mounts, err := m.libraries.Mounts()
if err != nil {
return nil, fmt.Errorf("failed to get mounts: %v", err)
}
if len(mounts) == 0 {
return nil, nil
}
var target string
for _, mount := range mounts {
filename := filepath.Base(mount.HostPath)
if filename == "libglxserver_nvidia.so."+m.driverVersion {
target = mount.Path
}
}
if target == "" {
return nil, nil
}
link := strings.TrimSuffix(target, "."+m.driverVersion)
links := []string{fmt.Sprintf("%s::%s", filepath.Base(target), link)}
symlinkHook := CreateCreateSymlinkHook(
m.nvidiaCTKPath,
links,
)
return symlinkHook.Hooks()
}
// Mounts returns the libraries required for Xorg
func (m xorgHooks) Mounts() ([]Mount, error) {
return nil, nil
}
// selectDeviceByPath is a filter that allows devices to be selected by the path
type selectDeviceByPath map[string]bool

View File

@@ -19,45 +19,49 @@ package discover
import (
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/sirupsen/logrus"
)
const (
nvidiaCTKExecutable = "nvidia-ctk"
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
)
var _ Discover = (*Hook)(nil)
// Devices returns an empty list of devices for a Hook discoverer.
func (h Hook) Devices() ([]Device, error) {
return nil, nil
}
// Mounts returns an empty list of mounts for a Hook discoverer.
func (h Hook) Mounts() ([]Mount, error) {
return nil, nil
}
// Hooks allows the Hook type to also implement the Discoverer interface.
// It returns a single hook
func (h Hook) Hooks() ([]Hook, error) {
return []Hook{h}, nil
}
// CreateCreateSymlinkHook creates a hook which creates a symlink from link -> target.
func CreateCreateSymlinkHook(nvidiaCTKPath string, links []string) Discover {
if len(links) == 0 {
return None{}
}
var args []string
for _, link := range links {
args = append(args, "--link", link)
}
return CreateNvidiaCTKHook(
nvidiaCTKPath,
"create-symlinks",
args...,
)
}
// CreateNvidiaCTKHook creates a hook which invokes the NVIDIA Container CLI hook subcommand.
func CreateNvidiaCTKHook(executable string, hookName string, additionalArgs ...string) Hook {
func CreateNvidiaCTKHook(nvidiaCTKPath string, hookName string, additionalArgs ...string) Hook {
return Hook{
Lifecycle: cdi.CreateContainerHook,
Path: executable,
Args: append([]string{filepath.Base(executable), "hook", hookName}, additionalArgs...),
Path: nvidiaCTKPath,
Args: append([]string{filepath.Base(nvidiaCTKPath), "hook", hookName}, additionalArgs...),
}
}
// FindNvidiaCTK locates the nvidia-ctk executable to be used in hooks.
// If an override is specified, this is used instead.
func FindNvidiaCTK(logger *logrus.Logger, override string) string {
if override != "" {
logger.Debugf("Using specified NVIDIA Container Toolkit CLI path %v", override)
return override
}
lookup := lookup.NewExecutableLocator(logger, "")
hookPath := nvidiaCTKDefaultFilePath
targets, err := lookup.Locate(nvidiaCTKExecutable)
if err != nil {
logger.Warnf("Failed to locate %v: %v", nvidiaCTKExecutable, err)
} else if len(targets) == 0 {
logger.Warnf("%v not found", nvidiaCTKExecutable)
} else {
logger.Debugf("Found %v candidates: %v", nvidiaCTKExecutable, targets)
hookPath = targets[0]
}
logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
return hookPath
}

View File

@@ -0,0 +1,61 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestIPCMounts(t *testing.T) {
logger, _ := testlog.NewNullLogger()
l := ipcMounts(
mounts{
logger: logger,
lookup: &lookup.LocatorMock{
LocateFunc: func(path string) ([]string, error) {
return []string{"/host/path"}, nil
},
},
required: []string{"target"},
},
)
mounts, err := l.Mounts()
require.NoError(t, err)
require.EqualValues(
t,
[]Mount{
{
HostPath: "/host/path",
Path: "/host/path",
Options: []string{
"ro",
"nosuid",
"nodev",
"bind",
"noexec",
},
},
},
mounts,
)
}

78
internal/discover/ipc.go Normal file
View File

@@ -0,0 +1,78 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
type ipcMounts mounts
// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, error) {
sockets := newMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithSearchPaths("/run", "/var/run"),
lookup.WithCount(1),
),
driverRoot,
[]string{
"/nvidia-persistenced/socket",
"/nvidia-fabricmanager/socket",
},
)
mps := newMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithCount(1),
),
driverRoot,
[]string{
"/tmp/nvidia-mps",
},
)
d := Merge(
(*ipcMounts)(sockets),
(*ipcMounts)(mps),
)
return d, nil
}
// Mounts returns the discovered mounts with "noexec" added to the mount options.
func (d *ipcMounts) Mounts() ([]Mount, error) {
mounts, err := (*mounts)(d).Mounts()
if err != nil {
return nil, err
}
var modifiedMounts []Mount
for _, m := range mounts {
mount := m
mount.Options = append(m.Options, "noexec")
modifiedMounts = append(modifiedMounts, mount)
}
return modifiedMounts, nil
}

View File

@@ -21,17 +21,15 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified
func NewLDCacheUpdateHook(logger *logrus.Logger, mounts Discover, cfg *Config) (Discover, error) {
func NewLDCacheUpdateHook(logger logger.Interface, mounts Discover, nvidiaCTKPath string) (Discover, error) {
d := ldconfig{
logger: logger,
nvidiaCTKPath: nvidiaCTKPath,
mountsFrom: mounts,
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
nvidiaCTKPath: cfg.NvidiaCTKPath,
}
return &d, nil
@@ -39,10 +37,9 @@ func NewLDCacheUpdateHook(logger *logrus.Logger, mounts Discover, cfg *Config) (
type ldconfig struct {
None
logger *logrus.Logger
mountsFrom Discover
lookup lookup.Locator
logger logger.Interface
nvidiaCTKPath string
mountsFrom Discover
}
// Hooks checks the required mounts for libraries and returns a hook to update the LDcache for the discovered paths.

View File

@@ -31,11 +31,6 @@ const (
func TestLDCacheUpdateHook(t *testing.T) {
logger, _ := testlog.NewNullLogger()
cfg := Config{
Root: "/",
NvidiaCTKPath: testNvidiaCTKPath,
}
testCases := []struct {
description string
mounts []Mount
@@ -95,7 +90,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
Lifecycle: "createContainer",
}
d, err := NewLDCacheUpdateHook(logger, mountMock, &cfg)
d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCTKPath)
require.NoError(t, err)
hooks, err := d.Hooks()

View File

@@ -16,12 +16,10 @@
package discover
import (
"github.com/sirupsen/logrus"
)
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
func NewMOFEDDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
func NewMOFEDDiscoverer(logger logger.Interface, root string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
[]string{

View File

@@ -22,8 +22,8 @@ import (
"strings"
"sync"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
// mounts is a generic discoverer for Mounts. It is customized by specifying the
@@ -31,7 +31,7 @@ import (
// based on the entry in the list.
type mounts struct {
None
logger *logrus.Logger
logger logger.Interface
lookup lookup.Locator
root string
required []string
@@ -42,7 +42,12 @@ type mounts struct {
var _ Discover = (*mounts)(nil)
// NewMounts creates a discoverer for the required mounts using the specified locator.
func NewMounts(logger *logrus.Logger, lookup lookup.Locator, root string, required []string) Discover {
func NewMounts(logger logger.Interface, lookup lookup.Locator, root string, required []string) Discover {
return newMounts(logger, lookup, root, required)
}
// newMounts creates a discoverer for the required mounts using the specified locator.
func newMounts(logger logger.Interface, lookup lookup.Locator, root string, required []string) *mounts {
return &mounts{
logger: logger,
lookup: lookup,
@@ -70,11 +75,11 @@ func (d *mounts) Mounts() ([]Mount, error) {
d.logger.Debugf("Locating %v", candidate)
located, err := d.lookup.Locate(candidate)
if err != nil {
d.logger.Warnf("Could not locate %v: %v", candidate, err)
d.logger.Warningf("Could not locate %v: %v", candidate, err)
continue
}
if len(located) == 0 {
d.logger.Warnf("Missing %v", candidate)
d.logger.Warningf("Missing %v", candidate)
continue
}
d.logger.Debugf("Located %v as %v", candidate, located)
@@ -93,6 +98,12 @@ func (d *mounts) Mounts() ([]Mount, error) {
uniqueMounts[p] = Mount{
HostPath: p,
Path: r,
Options: []string{
"ro",
"nosuid",
"nodev",
"bind",
},
}
}
}

View File

@@ -35,6 +35,14 @@ func TestMountsReturnsEmptyDevices(t *testing.T) {
}
func TestMounts(t *testing.T) {
mountOptions := []string{
"ro",
"nosuid",
"nodev",
"bind",
}
logger, logHook := testlog.NewNullLogger()
testCases := []struct {
@@ -70,7 +78,7 @@ func TestMounts(t *testing.T) {
},
required: []string{"required"},
},
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
expectedMounts: []Mount{{Path: "located", HostPath: "located", Options: mountOptions}},
},
{
description: "mounts removes located duplicates",
@@ -83,7 +91,7 @@ func TestMounts(t *testing.T) {
},
required: []string{"required0", "required1"},
},
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
expectedMounts: []Mount{{Path: "located", HostPath: "located", Options: mountOptions}},
},
{
description: "mounts skips located errors",
@@ -98,7 +106,7 @@ func TestMounts(t *testing.T) {
},
required: []string{"required0", "error", "required1"},
},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0", Options: mountOptions}, {Path: "required1", HostPath: "required1", Options: mountOptions}},
},
{
description: "mounts skips unlocated",
@@ -113,7 +121,7 @@ func TestMounts(t *testing.T) {
},
required: []string{"required0", "empty", "required1"},
},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0", Options: mountOptions}, {Path: "required1", HostPath: "required1", Options: mountOptions}},
},
{
description: "mounts adds multiple",
@@ -129,10 +137,10 @@ func TestMounts(t *testing.T) {
required: []string{"required0", "multiple", "required1"},
},
expectedMounts: []Mount{
{Path: "required0", HostPath: "required0"},
{Path: "multiple0", HostPath: "multiple0"},
{Path: "multiple1", HostPath: "multiple1"},
{Path: "required1", HostPath: "required1"},
{Path: "required0", HostPath: "required0", Options: mountOptions},
{Path: "multiple0", HostPath: "multiple0", Options: mountOptions},
{Path: "multiple1", HostPath: "multiple1", Options: mountOptions},
{Path: "required1", HostPath: "required1", Options: mountOptions},
},
},
{
@@ -147,7 +155,7 @@ func TestMounts(t *testing.T) {
required: []string{"required0", "multiple", "required1"},
},
expectedMounts: []Mount{
{Path: "/located", HostPath: "/some/root/located"},
{Path: "/located", HostPath: "/some/root/located", Options: mountOptions},
},
},
}

View File

@@ -1,109 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"path/filepath"
"strings"
"github.com/sirupsen/logrus"
)
type symlinks struct {
None
logger *logrus.Logger
nvidiaCTKPath string
csvFiles []string
mountsFrom Discover
}
// NewCreateSymlinksHook creates a discoverer for a hook that creates required symlinks in the container
func NewCreateSymlinksHook(logger *logrus.Logger, csvFiles []string, mounts Discover, cfg *Config) (Discover, error) {
d := symlinks{
logger: logger,
nvidiaCTKPath: FindNvidiaCTK(logger, cfg.NvidiaCTKPath),
csvFiles: csvFiles,
mountsFrom: mounts,
}
return &d, nil
}
// Hooks returns a hook to create the symlinks from the required CSV files
func (d symlinks) Hooks() ([]Hook, error) {
var args []string
for _, f := range d.csvFiles {
args = append(args, "--csv-filename", f)
}
links, err := d.getSpecificLinkArgs()
if err != nil {
return nil, fmt.Errorf("failed to determine specific links: %v", err)
}
args = append(args, links...)
hook := CreateNvidiaCTKHook(
d.nvidiaCTKPath,
"create-symlinks",
args...,
)
return []Hook{hook}, nil
}
// getSpecificLinkArgs returns the required specic links that need to be created
func (d symlinks) getSpecificLinkArgs() ([]string, error) {
mounts, err := d.mountsFrom.Mounts()
if err != nil {
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
}
linkProcessed := make(map[string]bool)
var links []string
for _, m := range mounts {
var target string
var link string
lib := filepath.Base(m.Path)
if strings.HasPrefix(lib, "libcuda.so") {
// XXX Many applications wrongly assume that libcuda.so exists (e.g. with dlopen).
target = "libcuda.so.1"
link = "libcuda.so"
} else if strings.HasPrefix(lib, "libGLX_nvidia.so") {
// XXX GLVND requires this symlink for indirect GLX support.
target = lib
link = "libGLX_indirect.so.0"
} else if strings.HasPrefix(lib, "libnvidia-opticalflow.so") {
// XXX Fix missing symlink for libnvidia-opticalflow.so.
target = "libnvidia-opticalflow.so.1"
link = "libnvidia-opticalflow.so"
} else {
continue
}
if linkProcessed[link] {
continue
}
linkPath := filepath.Join(filepath.Dir(m.Path), link)
links = append(links, "--link", fmt.Sprintf("%v::%v", target, linkPath))
linkProcessed[link] = true
}
return links, nil
}

64
internal/dxcore/api.go Normal file
View File

@@ -0,0 +1,64 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package dxcore
import (
"github.com/NVIDIA/go-nvml/pkg/dl"
)
const (
libraryName = "libdxcore.so"
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
)
// dxcore stores a reference the dxcore dynamic library
var dxcore *context
// Init initializes the dxcore dynamic library
func Init() error {
c, err := initContext()
if err != nil {
return err
}
dxcore = c
return nil
}
// Shutdown closes the dxcore dynamic library
func Shutdown() error {
if dxcore != nil && dxcore.initialized != 0 {
dxcore.deinitContext()
}
return nil
}
// GetDriverStorePaths returns the list of driver store paths
func GetDriverStorePaths() []string {
var paths []string
selected := make(map[string]bool)
for i := 0; i < dxcore.getAdapterCount(); i++ {
path := dxcore.getAdapter(i).getDriverStorePath()
if selected[path] {
continue
}
selected[path] = true
paths = append(paths, path)
}
return paths
}

334
internal/dxcore/dxcore.c Normal file
View File

@@ -0,0 +1,334 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*/
#include <dlfcn.h>
#include <stdlib.h>
#include "dxcore.h"
// We define log_write as an empty macro to allow dxcore to remain unchanged.
#define log_write(...)
// We define the following macros to allow dxcore to remain largely unchanged.
#define log_info(msg) log_write('I', __FILE__, __LINE__, msg)
#define log_warn(msg) log_write('W', __FILE__, __LINE__, msg)
#define log_err(msg) log_write('E', __FILE__, __LINE__, msg)
#define log_infof(fmt, ...) log_write('I', __FILE__, __LINE__, fmt, __VA_ARGS__)
#define log_warnf(fmt, ...) log_write('W', __FILE__, __LINE__, fmt, __VA_ARGS__)
#define log_errf(fmt, ...) log_write('E', __FILE__, __LINE__, fmt, __VA_ARGS__)
#define DXCORE_MAX_PATH 260
/*
* List of components we expect to find in the driver store that we need to mount
*/
static const char * const dxcore_nvidia_driver_store_components[] = {
"libcuda.so.1.1", /* Core library for cuda support */
"libcuda_loader.so", /* Core library for cuda support on WSL */
"libnvidia-ptxjitcompiler.so.1", /* Core library for PTX Jit support */
"libnvidia-ml.so.1", /* Core library for nvml */
"libnvidia-ml_loader.so", /* Core library for nvml on WSL */
"nvidia-smi", /* nvidia-smi binary*/
"nvcubins.bin", /* Binary containing GPU code for cuda */
};
/*
* List of functions and structures we need to communicate with libdxcore.
* Documentation on these functions can be found on docs.microsoft.com in d3dkmthk.
*/
struct dxcore_enumAdapters2;
struct dxcore_queryAdapterInfo;
typedef int(*pfnDxcoreEnumAdapters2)(struct dxcore_enumAdapters2* pParams);
typedef int(*pfnDxcoreQueryAdapterInfo)(struct dxcore_queryAdapterInfo* pParams);
struct dxcore_lib {
void* hDxcoreLib;
pfnDxcoreEnumAdapters2 pDxcoreEnumAdapters2;
pfnDxcoreQueryAdapterInfo pDxcoreQueryAdapterInfo;
};
struct dxcore_adapterInfo
{
unsigned int hAdapter;
struct dxcore_luid AdapterLuid;
unsigned int NumOfSources;
unsigned int bPresentMoveRegionsPreferred;
};
struct dxcore_enumAdapters2
{
unsigned int NumAdapters;
struct dxcore_adapterInfo *pAdapters;
};
enum dxcore_kmtqueryAdapterInfoType
{
DXCORE_QUERYDRIVERVERSION = 13,
DXCORE_QUERYREGISTRY = 48,
};
enum dxcore_queryregistry_type {
DXCORE_QUERYREGISTRY_DRIVERSTOREPATH = 2,
DXCORE_QUERYREGISTRY_DRIVERIMAGEPATH = 3,
};
enum dxcore_queryregistry_status {
DXCORE_QUERYREGISTRY_STATUS_SUCCESS = 0,
DXCORE_QUERYREGISTRY_STATUS_BUFFER_OVERFLOW = 1,
DXCORE_QUERYREGISTRY_STATUS_FAIL = 2,
};
struct dxcore_queryregistry_info {
enum dxcore_queryregistry_type QueryType;
unsigned int QueryFlags;
wchar_t ValueName[DXCORE_MAX_PATH];
unsigned int ValueType;
unsigned int PhysicalAdapterIndex;
unsigned int OutputValueSize;
enum dxcore_queryregistry_status Status;
union {
unsigned long long OutputQword;
wchar_t Output;
};
};
struct dxcore_queryAdapterInfo
{
unsigned int hAdapter;
enum dxcore_kmtqueryAdapterInfoType Type;
void *pPrivateDriverData;
unsigned int PrivateDriverDataSize;
};
static int dxcore_query_adapter_info_helper(struct dxcore_lib* pLib,
unsigned int hAdapter,
enum dxcore_kmtqueryAdapterInfoType type,
void* pPrivateDriverDate,
unsigned int privateDriverDataSize)
{
struct dxcore_queryAdapterInfo queryAdapterInfo = { 0 };
queryAdapterInfo.hAdapter = hAdapter;
queryAdapterInfo.Type = type;
queryAdapterInfo.pPrivateDriverData = pPrivateDriverDate;
queryAdapterInfo.PrivateDriverDataSize = privateDriverDataSize;
return pLib->pDxcoreQueryAdapterInfo(&queryAdapterInfo);
}
static int dxcore_query_adapter_wddm_version(struct dxcore_lib* pLib, unsigned int hAdapter, unsigned int* version)
{
return dxcore_query_adapter_info_helper(pLib,
hAdapter,
DXCORE_QUERYDRIVERVERSION,
(void*)version,
sizeof(*version));
}
static int dxcore_query_adapter_driverstore(struct dxcore_lib* pLib, unsigned int hAdapter, char** ppDriverStorePath)
{
struct dxcore_queryregistry_info params = {0};
struct dxcore_queryregistry_info* pValue = NULL;
wchar_t* pOutput;
size_t outputSizeInBytes;
size_t outputSize;
params.QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH;
if (dxcore_query_adapter_info_helper(pLib,
hAdapter,
DXCORE_QUERYREGISTRY,
(void*)&params,
sizeof(params)))
{
log_err("Failed to query driver store path size for the WDDM Adapter");
return (-1);
}
if (params.OutputValueSize > DXCORE_MAX_PATH * sizeof(wchar_t)) {
log_err("The driver store path size returned by dxcore is not valid");
return (-1);
}
outputSizeInBytes = (size_t)params.OutputValueSize;
outputSize = outputSizeInBytes / sizeof(wchar_t);
pValue = calloc(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes + sizeof(wchar_t), 1);
if (!pValue) {
log_err("Out of memory while allocating temp buffer to query adapter info");
return (-1);
}
pValue->QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH;
pValue->OutputValueSize = (unsigned int)outputSizeInBytes;
if (dxcore_query_adapter_info_helper(pLib,
hAdapter,
DXCORE_QUERYREGISTRY,
(void*)pValue,
(unsigned int)(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes)))
{
log_err("Failed to query driver store path data for the WDDM Adapter");
free(pValue);
return (-1);
}
pOutput = (wchar_t*)(&pValue->Output);
// Make sure no matter what happened the wchar_t string is null terminated
pOutput[outputSize] = L'\0';
// Convert the output into a regular c string
*ppDriverStorePath = (char*)calloc(outputSize + 1, sizeof(char));
if (!*ppDriverStorePath) {
log_err("Out of memory while allocating the buffer for the driver store path");
free(pValue);
return (-1);
}
wcstombs(*ppDriverStorePath, pOutput, outputSize);
free(pValue);
return 0;
}
static void dxcore_add_adapter(struct dxcore_context* pCtx, struct dxcore_lib* pLib, struct dxcore_adapterInfo *pAdapterInfo)
{
unsigned int wddmVersion = 0;
char* driverStorePath = NULL;
log_infof("Creating a new WDDM Adapter for hAdapter:%x luid:%llx", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid));
if (dxcore_query_adapter_wddm_version(pLib, pAdapterInfo->hAdapter, &wddmVersion)) {
log_err("Failed to query the WDDM version for the specified adapter. Skipping it.");
return;
}
if (wddmVersion < 2700) {
log_err("Found a WDDM adapter running a driver with pre-WDDM 2.7 . Skipping it.");
return;
}
if (dxcore_query_adapter_driverstore(pLib, pAdapterInfo->hAdapter, &driverStorePath)) {
log_err("Failed to query driver store path for the WDDM Adapter . Skipping it.");
return;
}
// We got all the info we needed. Adding it to the tracking structure.
{
struct dxcore_adapter* newList;
newList = realloc(pCtx->adapterList, sizeof(struct dxcore_adapter) * (pCtx->adapterCount + 1));
if (!newList) {
log_err("Out of memory when trying to add a new WDDM Adapter to the list of valid adapters");
free(driverStorePath);
return;
}
pCtx->adapterList = newList;
pCtx->adapterList[pCtx->adapterCount].hAdapter = pAdapterInfo->hAdapter;
pCtx->adapterList[pCtx->adapterCount].pDriverStorePath = driverStorePath;
pCtx->adapterList[pCtx->adapterCount].wddmVersion = wddmVersion;
pCtx->adapterCount++;
}
log_infof("Adding new adapter via dxcore hAdapter:%x luid:%llx wddm version:%d", pAdapterInfo->hAdapter, *((unsigned long long*)&pAdapterInfo->AdapterLuid), wddmVersion);
}
static void dxcore_enum_adapters(struct dxcore_context* pCtx, struct dxcore_lib* pLib)
{
struct dxcore_enumAdapters2 params = {0};
unsigned int adapterIndex = 0;
params.NumAdapters = 0;
params.pAdapters = NULL;
if (pLib->pDxcoreEnumAdapters2(&params)) {
log_err("Failed to enumerate adapters via dxcore");
return;
}
params.pAdapters = malloc(sizeof(struct dxcore_adapterInfo) * params.NumAdapters);
if (pLib->pDxcoreEnumAdapters2(&params)) {
free(params.pAdapters);
log_err("Failed to enumerate adapters via dxcore");
return;
}
for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
dxcore_add_adapter(pCtx, pLib, &params.pAdapters[adapterIndex]);
}
free(params.pAdapters);
}
int dxcore_init_context(struct dxcore_context* pCtx)
{
struct dxcore_lib lib = {0};
pCtx->initialized = 0;
pCtx->adapterCount = 0;
pCtx->adapterList = NULL;
lib.hDxcoreLib = dlopen("libdxcore.so", RTLD_LAZY);
if (!lib.hDxcoreLib) {
goto error;
}
lib.pDxcoreEnumAdapters2 = (pfnDxcoreEnumAdapters2)dlsym(lib.hDxcoreLib, "D3DKMTEnumAdapters2");
if (!lib.pDxcoreEnumAdapters2) {
log_err("dxcore library is present but the symbol D3DKMTEnumAdapters2 is missing");
goto error;
}
lib.pDxcoreQueryAdapterInfo = (pfnDxcoreQueryAdapterInfo)dlsym(lib.hDxcoreLib, "D3DKMTQueryAdapterInfo");
if (!lib.pDxcoreQueryAdapterInfo) {
log_err("dxcore library is present but the symbol D3DKMTQueryAdapterInfo is missing");
goto error;
}
dxcore_enum_adapters(pCtx, &lib);
log_info("dxcore layer initialized successfully");
pCtx->initialized = 1;
dlclose(lib.hDxcoreLib);
return 0;
error:
dxcore_deinit_context(pCtx);
if (lib.hDxcoreLib)
dlclose(lib.hDxcoreLib);
return (-1);
}
static void dxcore_deinit_adapter(struct dxcore_adapter* pAdapter)
{
if (!pAdapter)
return;
free(pAdapter->pDriverStorePath);
}
void dxcore_deinit_context(struct dxcore_context* pCtx)
{
unsigned int adapterIndex = 0;
if (!pCtx)
return;
for (adapterIndex = 0; adapterIndex < pCtx->adapterCount; adapterIndex++) {
dxcore_deinit_adapter(&pCtx->adapterList[adapterIndex]);
}
free(pCtx->adapterList);
pCtx->initialized = 0;
}

Some files were not shown because too many files have changed in this diff Show More