Compare commits

...

474 Commits

Author SHA1 Message Date
Evan Lezar
d9de4a09b8 Merge branch 'bump-version-1.11.0' into 'main'
Bump version to 1.11.0

See merge request nvidia/container-toolkit/container-toolkit!213
2022-09-06 09:12:10 +00:00
Evan Lezar
2dbcda2619 Ensure that base package is built for debian
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-09-05 17:04:49 +02:00
Evan Lezar
691b93ffb0 Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-09-05 16:33:42 +02:00
Evan Lezar
cb0c94cd40 Bump version to v1.11.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-09-05 15:57:57 +02:00
Evan Lezar
3168718563 Update git commit command
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-09-05 15:57:29 +02:00
Evan Lezar
503ed96275 Merge branch 'fix-release-tooling' into 'main'
Ensure CLI versions are set correctly for RPM packages

See merge request nvidia/container-toolkit/container-toolkit!211
2022-08-24 10:45:38 +00:00
Evan Lezar
d8ba84d427 Add release tests for fedora35
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-24 11:57:20 +02:00
Evan Lezar
8e8c41a3bc Clean up repo test scripts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-24 11:57:20 +02:00
Evan Lezar
e34fe17b45 Add fedora35 to release and signing scripts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-24 11:57:20 +02:00
Evan Lezar
c5b0278c58 Ensure CLI versions are set correctly for RPM packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-24 11:57:20 +02:00
Evan Lezar
8daa257b35 Merge branch 'update-changelog' into 'main'
Add changelog for 1.11.0-rc.3

See merge request nvidia/container-toolkit/container-toolkit!210
2022-08-24 09:01:39 +00:00
Evan Lezar
6329174cfc Add changelog for 1.11.0-rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-24 10:08:23 +02:00
Evan Lezar
1ec41c1bf1 Merge branch 'update-libnvidia-container' into 'main'
Update libnvidia-container

See merge request nvidia/container-toolkit/container-toolkit!209
2022-08-23 16:52:09 +00:00
Evan Lezar
581a76de38 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 17:29:01 +02:00
Evan Lezar
5d52ca8909 Merge branch 'add-fedora35' into 'main'
Add fedora35 package targets

See merge request nvidia/container-toolkit/container-toolkit!205
2022-08-23 13:04:45 +00:00
Evan Lezar
ad7151d394 Update CUDA base image to 11.7.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 14:18:49 +02:00
Evan Lezar
3269a7b0e7 Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 14:18:49 +02:00
Evan Lezar
6a155cc606 Increase package build timeout to 3 hours for slow aarch64 builds
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 14:18:49 +02:00
Evan Lezar
a5bbf613e8 Use single config file for centos, al2, and fedora
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 14:18:49 +02:00
Evan Lezar
22427c1359 Add fedora35 CI targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 14:18:49 +02:00
Evan Lezar
f17121fd6c Add fedora targets to release scripts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 14:18:49 +02:00
Evan Lezar
256e37eb3f Add fedora35 package targets
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 14:18:49 +02:00
Evan Lezar
bdfd123b9d Switch to single docker file yum-based rpm builds
This reuses the docker file for yum-based rpm distros (centos, amazonlinux)
instead of maintaining two files with the same contents.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-23 14:18:49 +02:00
Jon Mayo
3f7dce202a Merge branch 'remove-podman' into 'main'
Specify hook structure instead of importing Podman

See merge request nvidia/container-toolkit/container-toolkit!208
2022-08-22 15:25:40 +00:00
Evan Lezar
a6d21abe14 Merge branch 'add-package-with-no-libnvidia-container' into 'main'
Split nvidia-container-toolkit package

See merge request nvidia/container-toolkit/container-toolkit!195
2022-08-22 09:08:33 +00:00
Evan Lezar
d0f1fe2273 Use new packages in toolkit image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-19 12:38:17 +02:00
Evan Lezar
8de9593209 Split nvidia-container-toolkit package
This change splits the nvidia-container-toolkit package into the top-level package and
an nvidia-container-toolkit-base package.
The nvidia-container-toolkit-base package allows the NVIDIA Container Runtime and
NVIDIA Container Toolkit CLI to be installed on systems without requiring that the
NVIDIA Container Runtine Hook and the transitive dependencies included in the NVIDIA
Container Library and NVIDIA Container CLI also be installed.

This allows the runtime to be used on systems where the CSV or CDI mode of the runtime
is used exclusively.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-19 12:38:17 +02:00
Evan Lezar
64b2b50470 Fix centos8 test image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-19 12:36:52 +02:00
Evan Lezar
4dc1451c49 Fix indentation in makefile
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-19 12:36:52 +02:00
Evan Lezar
211081ff25 Update vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-19 10:28:00 +02:00
Evan Lezar
c1c1d5cf8e Specify hook structure instead of importing Podman
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-19 10:26:34 +02:00
Evan Lezar
e91ffef258 Merge branch 'fix-runtime-hook-rename' into 'main'
Fix cleanup of nvidia-container-toolkit link

See merge request nvidia/container-toolkit/container-toolkit!207
2022-08-18 12:51:51 +00:00
Evan Lezar
47c8aa3790 Fix cleanup of nvidia-container-toolkit link
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-18 14:06:08 +02:00
Evan Lezar
33b4e7fb0a Merge branch 'fix-containerd-tests' into 'main'
Fix image in containerd tests

See merge request nvidia/container-toolkit/container-toolkit!206
2022-08-12 13:46:24 +00:00
Evan Lezar
936da0295b Use proper cuda image for containerd tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-12 14:23:24 +02:00
Evan Lezar
c2205c14fb Update subcomponents
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-12 14:22:40 +02:00
Evan Lezar
56935f5743 Merge branch 'fix-mounts' into 'main'
Fix setting of toolkit config option in toolkit container

See merge request nvidia/container-toolkit/container-toolkit!204
2022-08-09 15:46:15 +00:00
Evan Lezar
1b3bae790c Update image used for containerd tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-09 16:55:51 +02:00
Evan Lezar
47559a8c87 Output applied config to toolkit container stdout
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-09 15:18:59 +02:00
Evan Lezar
86412ea821 Ensure that toolkit-container sets correct default value
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-09 15:18:52 +02:00
Evan Lezar
b8aa844171 Fix setting of toolkit config option in toolkit container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-09 15:18:52 +02:00
Evan Lezar
f9464c5cf9 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-09 15:18:52 +02:00
Evan Lezar
9df75e1fa3 Merge branch 'add-tegra-files-as-mounts' into 'main'
Add modifier to inject Tegra platform files

See merge request nvidia/container-toolkit/container-toolkit!203
2022-08-09 11:43:04 +00:00
Evan Lezar
0218e2ebf7 Update vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-08 17:12:47 +02:00
Evan Lezar
a9dc6550d5 Use nvinfo package from go-nvlib
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-08 17:11:42 +02:00
Evan Lezar
ffd6ec3c54 Add modifier to inject Tegra platform files
This change adds a modifier to that injects the tegra platform files
* /etc/nv_tegra_release
* /sys/devices/soc0/family

allowing these files to be used for platform detection in a containerized
context such as the GPU device plugin.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-08 16:04:20 +02:00
Evan Lezar
de3e0df96c Merge branch 'bump-version-1.11.0-rc.3' into 'main'
Bump version to 1.11.0-rc.3

See merge request nvidia/container-toolkit/container-toolkit!202
2022-08-08 13:45:59 +00:00
Evan Lezar
e5dadf34d9 Bump version to 1.11.0-rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-08-08 14:56:01 +02:00
Evan Lezar
52145f2d73 Merge branch 'fix-libnvidia-container-tag' into 'main'
Fix setting of LIBNVIDIA_CONTAINER_TAG

See merge request nvidia/container-toolkit/container-toolkit!201
2022-07-27 11:31:06 +00:00
Evan Lezar
90df3caf62 Fix setting of LIBNVIDIA_CONTAINER_TAG
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 13:30:31 +02:00
Evan Lezar
50db66a925 Merge branch 'release-1.11.0-rc.2' into 'main'
Add CHANGELOG entry for 1.11.0-rc.2

See merge request nvidia/container-toolkit/container-toolkit!200
2022-07-27 10:53:26 +00:00
Evan Lezar
8587fa05bd Add CHANGELOG entry for 1.11.0-rc.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 12:06:09 +02:00
Evan Lezar
8129dade3c Merge branch 'set-mount-devices' into 'main'
Allow accept-nvidia-visible-devices-* to be set by toolkit contianer

See merge request nvidia/container-toolkit/container-toolkit!198
2022-07-27 09:58:25 +00:00
Evan Lezar
3610fe7c33 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 11:12:57 +02:00
Evan Lezar
90518e0ce5 Allow accept-visible-devices config options to be set
This change allows the
* accept-nvidia-visible-devices-envvar-when-unprivileged
* accept-nvidia-visible-devices-as-volume-mounts

options to be set in the toolkit-container. These are controlled
by command line flags or the following environment variables:

* ACCEPT_NVIDIA_VISIBLE_DEVICES_ENVVAR_WHEN_UNPRIVILEGED
* ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 10:57:43 +02:00
Evan Lezar
9c060f06ba Remove unused TOOLKIT_ARGS / --toolkit-args
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 10:50:18 +02:00
Evan Lezar
e848aa7813 Set toolkit root as flag
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 10:50:06 +02:00
Evan Lezar
feedc912e4 Rename toolkitDir toolkitRoot
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 10:50:05 +02:00
Evan Lezar
ab3f05cf62 Move global toolkitDir to options struct
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 10:41:46 +02:00
Evan Lezar
35982e51bf Move toolkit options to struct
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-27 10:40:19 +02:00
Evan Lezar
94e650c518 Merge branch 'bump-version' into 'main'
bump version to 1.11.0-rc.2

See merge request nvidia/container-toolkit/container-toolkit!197
2022-07-26 17:57:23 +00:00
Evan Lezar
d9edc18bf8 Bump version to 1.11.0-rc.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-25 09:51:20 +02:00
Evan Lezar
f4d01e0a05 Add changelog entries for 1.11.0-rc.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-25 09:51:01 +02:00
Evan Lezar
648cfaba51 Merge branch 'update-error-message' into 'main'
Make error message clearer

See merge request nvidia/container-toolkit/container-toolkit!194
2022-07-21 08:49:56 +00:00
Christopher Desiniotis
3a9de13f4e Apply 1 suggestion(s) to 1 file(s) 2022-07-21 08:03:39 +00:00
Evan Lezar
629a68937e Merge branch 'fix-relative-files' into 'main'
Fix adjusting relative paths for containerised devices and mounts.

See merge request nvidia/container-toolkit/container-toolkit!193
2022-07-20 11:40:28 +00:00
Evan Lezar
34e80abdea Add root to mounts type
This change adds a root member to the mounts type that is used to
perform most of the lookups for files and devices. This allows
for consistent handling of relative paths.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-18 14:37:02 +02:00
Evan Lezar
1161b21166 Make error message clearer
This change improves the error message when invoking the NVIDIA
Runtime Hook in non-legacy mode. This should guide users to specifying
the --runtime=nvidia flag when using docker.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-18 13:09:59 +02:00
Evan Lezar
bcdef81e30 Merge branch 'fix-ordering-of-csv-hooks' into 'main'
Fix ordering of create-symlink and update-ldcache hooks

See merge request nvidia/container-toolkit/container-toolkit!192
2022-07-18 10:59:41 +00:00
Evan Lezar
acc0afbb7a Remove Relative method from Locator
The Relative method added to the Locator interface was
not correctly implemented in the file type. The root was
never set when instantiating the object.

This change removes this method from the interface and the file
type, switching to a local implementation in the mounts type
instead.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-15 16:40:27 +02:00
Evan Lezar
7584044b3c Fix bug where ldcache may not contain symlinks
Since the creation of symlinks may include other libraries / folders
the ldcache should be updated AFTER the symlinks are created.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-15 12:18:40 +02:00
Evan Lezar
02c14e981c Add tests for identifying libraries
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-15 12:17:15 +02:00
Evan Lezar
37ee972f74 Merge branch 'CNT-2349/configure-docker' into 'main'
Add nvidia-ctk runtime configure command to update docker config

See merge request nvidia/container-toolkit/container-toolkit!166
2022-07-14 08:06:27 +00:00
Evan Lezar
3809407b6a Merge branch 'rename-to-nvidia-container-hook' into 'main'
Rename -toolkit executable to -runtime-hook

See merge request nvidia/container-toolkit/container-toolkit!189
2022-07-13 11:08:53 +00:00
Evan Lezar
f9547c447a Merge branch 'fix-cdi-refresh' into 'main'
Ensure that CDI registry is refreshed

See merge request nvidia/container-toolkit/container-toolkit!191
2022-07-13 09:38:45 +00:00
Evan Lezar
eb85d45137 Merge branch 'CNT-3297/cdi-config' into 'main'
Add runtime config option for CDI spec dirs

See merge request nvidia/container-toolkit/container-toolkit!190
2022-07-13 09:36:33 +00:00
Evan Lezar
9f0060f651 Add nvidia-ctk runtime configure command
This change adds a `runtime configure` command to the nvidia-ctk CLI. This
command is currently limited to configuring the docker config on the
system by modifying the daemon.json config file associated with docker.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-13 10:33:46 +02:00
Evan Lezar
0e6dc3f7ea Move docker config handling to internal package
In preparation for adding a command to the nvidia-ctk CLI to modify
the docker config, this change refactors load, update, and flush logic
from the toolkit container docker CLI to an internal package.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-13 10:30:01 +02:00
Evan Lezar
1b4944e1de Ensure that CDI registry is refreshed
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-12 14:07:21 +02:00
Evan Lezar
83743e3613 Add runtime config option for CDI spec dirs
This change adds an nvidia-container-runtime.modes.cdi.spec-dirs
config option that allows the default spec dirs to be overridden.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-11 15:39:48 +02:00
Evan Lezar
87afcc3ef4 Reuse check for existing hook
This change reuse the code that checks for the existing NVIDIA
Container Runtime hook to ensure that both nvidia-container-toolkit
and nvidia-container-runtime-hook are detected.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-08 12:20:19 +02:00
Evan Lezar
6ed3a4e1a6 Update package descriptions and URLs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-08 12:16:03 +02:00
Evan Lezar
8a56671d18 Update package definitions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-08 12:16:03 +02:00
Evan Lezar
1d81db76a6 Update references to nvidia-container-runtime-hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-08 12:15:56 +02:00
Evan Lezar
f50aecb84e Rename -toolkit executable to -runtime-hook
This change renames the nvidia-container-toolkit executable
to nvidia-container-runtime-hook. Here nvidia-container-toolkit
is created as a symlink to nvidia-container-runtime-hook.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-08 12:09:11 +02:00
Evan Lezar
a4258277e1 Merge branch 'update-release-script' into 'main'
Update release tooling to allow for rc release that don't update all packages.

See merge request nvidia/container-toolkit/container-toolkit!188
2022-07-07 14:33:27 +00:00
Evan Lezar
18eb3c7c38 Skip packages that already exist
For rc releases we allow nvidia-container-toolkit versions
to not match libnvidia-container versions. This change ensures
that only changed packages are released.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-07 15:41:20 +02:00
Evan Lezar
a0e728b5c8 Use centos:stream8 image for signing
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-07 15:40:53 +02:00
Evan Lezar
df0176cca4 Merge branch 'support-host-device-paths' into 'main'
Support device nodes with a different root

See merge request nvidia/container-toolkit/container-toolkit!187
2022-07-07 11:35:10 +00:00
Evan Lezar
b68b3c543b Use device host path to determine properties
This mirrors what is done in cri-o and allows for devices nodes
from, for example, the driver container to be injected into a
container at /dev instead of <ROOT>/dev

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-07 12:03:23 +02:00
Evan Lezar
aea1a85bb4 Update vendored runc version
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-07 11:30:01 +02:00
Evan Lezar
98e874e750 Merge branch 'add-cdi-mode' into 'main'
Add CDI mode to NVIDIA Container Runtime

See merge request nvidia/container-toolkit/container-toolkit!172
2022-07-07 08:09:38 +00:00
Evan Lezar
eef016c27d Merge branch 'refactor-csv-discovery' into 'main'
Refactor device discovery

See merge request nvidia/container-toolkit/container-toolkit!185
2022-07-07 08:07:43 +00:00
Evan Lezar
19f89ecafd Update cdi package and run go mod vendor
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-06 16:53:38 +02:00
Evan Lezar
8817dee66c Add support for specifying devices in annotations
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-06 16:53:36 +02:00
Evan Lezar
404e266222 Add cdi mode to NVIDIA Container Runtime
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-06 16:53:05 +02:00
Evan Lezar
9b898c65fa Merge branch 'move-license-make-target' into 'main'
The licenses make target should not be a check target

See merge request nvidia/container-toolkit/container-toolkit!186
2022-07-06 13:14:43 +00:00
Evan Lezar
5c39cf4deb The licenses make target should not be a check target
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-06 14:24:11 +02:00
Evan Lezar
beff276a52 Add charDevices discoverer for devices
This change adds a charDevices discoverer and using this
for CSV, GDS, and MOFED discovery. Internally the discoverer
is a "mounts" discoverer with a charDevice locator.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-06 13:43:23 +02:00
Evan Lezar
55cb82c6c8 Create single discoverer per mount type for CSV
Instead of creating a set of discoverers per file, this change creates
a discoverer per type by first concatenating the mount specifications
from all files. This will allow all device nodes, for example, to
be treated as a single device.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-06 10:57:35 +02:00
Evan Lezar
88d1143827 Merge branch 'add-go-license' into 'main'
Add tooling to check go licenses

See merge request nvidia/container-toolkit/container-toolkit!183
2022-07-06 05:08:59 +00:00
Evan Lezar
d5162b1917 Add tooling to check go licenses
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-05 20:19:23 +02:00
Evan Lezar
ec078543a1 Merge branch 'rename-discover-merge' into 'main'
Rename discover.NewList to discover.Merge

See merge request nvidia/container-toolkit/container-toolkit!182
2022-07-05 09:37:03 +00:00
Evan Lezar
9191074666 Rename discover.NewList to discover.Merge
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-05 10:28:40 +02:00
Evan Lezar
89824849d3 Merge branch 'refactor-envvar-devices' into 'main'
Add DevicesFromEnvvars function to CUDA image abstraction

See merge request nvidia/container-toolkit/container-toolkit!178
2022-07-04 08:47:28 +00:00
Evan Lezar
877083f091 Merge branch 'CNT-3242/strip-root-from-container-mount' into 'main'
Strip root (e.g. driver root) from located mount paths in the container

See merge request nvidia/container-toolkit/container-toolkit!177
2022-07-04 08:45:38 +00:00
Evan Lezar
6467fcd0f5 Merge branch 'ensure-test-output-path-exists' into 'main'
Ensure test/output path exists

See merge request nvidia/container-toolkit/container-toolkit!180
2022-07-04 08:44:11 +00:00
Evan Lezar
fd135f1a8b Add Relative function to Locator interface
This adds a Relative function to the Locator interface and uses
this to determine the host and container paths for located files
(and devices). This ensures that the root (e.g. the nvidia driver
root) is stripped from the container path.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 16:23:50 +02:00
Evan Lezar
4e08ec2405 Use CUDA.DevicesFromEnvvar to check if modifications are required
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 16:14:36 +02:00
Evan Lezar
925c348565 Add DevicesFromEnvvars function to CUDA image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 16:12:13 +02:00
Kevin Klues
25fd1aaf7e Merge branch 'CNT-3084/include-cufile.json' into 'main'
Include cufile.json in GDS discovery

See merge request nvidia/container-toolkit/container-toolkit!175
2022-07-01 13:49:02 +00:00
Kevin Klues
91e645b91b Merge branch 'gds-poc' into 'main'
Add initial GDS and MOFED discovery

See merge request nvidia/container-toolkit/container-toolkit!163
2022-07-01 13:43:20 +00:00
Evan Lezar
a1c2f07b6e Add /etc/cufile.json to list of required mounts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 14:54:58 +02:00
Evan Lezar
7f7bec0668 Create GDS and MOFED modifiers
This change creates GDS and MOFED modifiers and adds them to the
modifer created for the selected runtime mode if the NVIDIA_GDS
and NVIDIA_MOFED envvars are set to "enabled", respectively.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 14:54:05 +02:00
Evan Lezar
cb34f7c6d1 Add discovery of GDS and MOFED devices
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 14:40:55 +02:00
Evan Lezar
7f47a61986 Allow globs in filenames for locators
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 14:30:33 +02:00
Evan Lezar
e8843c38f2 Move cmd/nvidia-container-runtime/modifier package to internal/modifier
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 14:28:40 +02:00
Evan Lezar
d66c00dd1d Use modifier list and discoverModifer
This change uses modifier compositioning and the discoverModifier to
refactor the existing CSV modifier.

This change adds a discoverModifier to the internal/modifier package and
refactors the CSV modifier to use this abstraction.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 14:25:19 +02:00
Evan Lezar
55ac8628c8 Add lists of modifiers to allow for modifier compositioning
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 14:25:18 +02:00
Evan Lezar
175f75b43f Ensure test/output path exists
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-07-01 10:07:37 +02:00
Evan Lezar
da3226745c Update vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-22 10:58:09 +02:00
Evan Lezar
b23e3ea13a Merge branch 'bump-1.11.0-rc.1' into 'main'
Bump version to 1.11.0-rc.1

See merge request nvidia/container-toolkit/container-toolkit!170
2022-06-22 07:52:19 +00:00
Evan Lezar
02f0ee08fc Update nvidia-docker and nvidia-container-runtime
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-17 11:58:06 +02:00
Evan Lezar
4b0e79be50 Update nvidia-docker and nvidia-container-runtime branches to main
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-17 11:37:53 +02:00
Evan Lezar
8b729475e2 Allow any 1.* version of libnvidia-container package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-16 14:57:30 +02:00
Evan Lezar
a1319b1786 Switch to latest docker and docker dind in CI
This change prevents errors when downloading ubuntu repos on
amd64 architectures. The `stable` images were last pushed
2 years ago.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-16 13:44:14 +02:00
Evan Lezar
278fa43303 Allow libnvidia-container1 version to be specified directly
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-15 13:37:42 +02:00
Evan Lezar
d75f364b27 Update build scripts to set libnvidia-container version
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-15 13:37:42 +02:00
Evan Lezar
52d5021b76 Bump version to 1.11.0-rc.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-15 13:37:40 +02:00
Kevin Klues
7cfd3bd510 Merge branch 'bump-v1.10.0' into 'main'
Bump version to v1.10.0

See merge request nvidia/container-toolkit/container-toolkit!169
2022-06-13 10:32:37 +00:00
Evan Lezar
05ca131858 Update libnvidia-container submodule to v1.10.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-13 11:40:18 +02:00
Evan Lezar
181ce8571d Bump version to v1.10.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-13 11:40:18 +02:00
Shiva Krishna Merla
2ab0c6abce Merge branch 'update_container_licenses' into 'main'
Update toolkit images to use NGC DL license

See merge request nvidia/container-toolkit/container-toolkit!164
2022-06-08 19:04:22 +00:00
Shiva Krishna Merla
50caf29b4e Update toolkit images to use NGC DL license 2022-06-08 19:04:21 +00:00
Evan Lezar
067f7af142 Merge branch 'update-nvidia-docker' into 'main'
Bump nvidia-docker version to 2.11.0

See merge request nvidia/container-toolkit/container-toolkit!167
2022-06-08 12:15:17 +00:00
Evan Lezar
d1449951bc Bump nvidia-docker version
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-08 13:25:27 +02:00
Evan Lezar
a05af50b0f Merge branch 'bump-cuda-version' into 'main'
Bump CUDA base image version to 11.7.0

See merge request nvidia/container-toolkit/container-toolkit!162
2022-06-07 15:22:05 +00:00
Evan Lezar
950aff269b Merge branch 'bump-version-1.10.0-rc.4' into 'main'
Update NVIDIA Container Runtime readme and installed configs

See merge request nvidia/container-toolkit/container-toolkit!160
2022-06-07 15:05:48 +00:00
Evan Lezar
e033db559f Switch default container-toolkit image target to ubuntu20.04
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-07 11:32:20 +02:00
Evan Lezar
9a24a40fd2 Merge branch 'only-bump-version' into 'main'
Bump version to 1.10.0-rc.4

See merge request nvidia/container-toolkit/container-toolkit!165
2022-06-07 09:00:38 +00:00
Evan Lezar
df391e2144 Only generate amd64 images for ubuntu18.04
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-07 10:58:15 +02:00
Evan Lezar
9146b4d4b6 Remove build and release of centos8 container-toolkit images
Note that the centos8 packages are still built.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-07 10:58:15 +02:00
Evan Lezar
068d7e085b Use ubi8 base image for centos8
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-07 10:58:15 +02:00
Evan Lezar
79510a8290 Bump CUDA base image version to 11.7.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-07 10:58:15 +02:00
Evan Lezar
50240c93bd Update config files with options and defaults
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-03 13:10:24 +02:00
Evan Lezar
7ca0e5db60 Update NVIDIA Container Runtime readme
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-03 13:10:21 +02:00
Evan Lezar
c0e6765d46 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-01 15:29:25 +02:00
Evan Lezar
7739b0e8ea Bump version to 1.10.0-rc.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-06-01 14:46:12 +02:00
Evan Lezar
ab23fc52db Merge branch 'fix-binary-name' into 'main'
Use BinaryName for v1 containerd runtime config

See merge request nvidia/container-toolkit/container-toolkit!159
2022-05-30 07:53:42 +00:00
Evan Lezar
530d66b5c7 Also set default_runtime.options.BinaryName
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-27 16:21:52 +02:00
Evan Lezar
dad3e855b5 Also cleanup v1 default_runtime if BinaryName is set
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-27 16:18:57 +02:00
Evan Lezar
15cbd54d1c Also set Runtime file v1 containerd runtime config
This ensures that older versions of containerd that may be expecting
this over options.BinaryName should continue to work.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-26 06:26:06 +02:00
Evan Lezar
4cd719692e Use BinaryName for v1 containerd runtime config
This fixes a bug where the runtime path for v1 containerd configs
was specified in the options.Runtime setting (which is used
for the default runtime) instead of options.BinaryName.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-26 06:25:09 +02:00
Evan Lezar
b940294557 Merge branch 'CNT-2979/allow-empty-config' into 'main'
Return default config if config path is not found

See merge request nvidia/container-toolkit/container-toolkit!156
2022-05-25 12:20:51 +00:00
Evan Lezar
840cdec36d Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-25 13:23:21 +02:00
Evan Lezar
73a5b70a02 Return default config if config path is not found
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-25 13:22:45 +02:00
Evan Lezar
f0cae49892 Merge branch 'fix-jetpack-require' into 'main'
Ignore NVIDIA_REQUIRE_JETPACK* for image requirements

See merge request nvidia/container-toolkit/container-toolkit!158
2022-05-25 11:19:47 +00:00
Evan Lezar
e07c7f0fa2 Ignore NVIDIA_REQUIRE_JETPACK* for image requirements
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-24 09:53:37 +02:00
Evan Lezar
52ce97929c Merge branch 'fix-is-tegra-check' into 'main'
Fix bug in tegra detection

See merge request nvidia/container-toolkit/container-toolkit!157
2022-05-23 08:00:09 +00:00
Evan Lezar
084eae6e0d Fix bug in tegra detection
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-20 14:39:36 +02:00
Evan Lezar
f656b5c887 Merge branch 'fix-char-device' into 'main'
Fix assertCharDevice matching on all files

See merge request nvidia/container-toolkit/container-toolkit!155
2022-05-20 10:32:51 +00:00
Evan Lezar
55c1d7c256 Fix assertCharDevice matching on all files
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-20 10:08:00 +02:00
Evan Lezar
0f2b20fffc Merge branch 'auto-generate-changelog' into 'main'
Use single  changelog.md file instead of separate package-specific changelogs

See merge request nvidia/container-toolkit/container-toolkit!154
2022-05-20 08:03:19 +00:00
Evan Lezar
bb69727148 Include git commit in changelog URL
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-18 16:02:14 +02:00
Evan Lezar
0b4f3aaf69 Merge branch 'bump-1.10.0-rc.3' into 'main'
Bump version to 1.10.0-rc.3

See merge request nvidia/container-toolkit/container-toolkit!153
2022-05-18 13:46:41 +00:00
Evan Lezar
e5125515f0 Automatically generate changelogs in docker builds
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-18 14:54:58 +02:00
Evan Lezar
033b2fd90d Add dummy entry for rpm changelog matching other components
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-18 14:54:58 +02:00
Evan Lezar
a0a00e38fd Format CHANGELOG.md as markdown
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-18 14:54:58 +02:00
Evan Lezar
77cf70b625 Move debian changelog to CHANGELOG.md
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-18 14:54:58 +02:00
Evan Lezar
8ab3d713bc Update libnvidia-container version
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-18 14:53:29 +02:00
Evan Lezar
c58d81cec5 Bump version to 1.10.0-rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-18 13:38:54 +02:00
Evan Lezar
2a3b87157a Merge branch 'prep-release' into 'main'
Update changelog and libnvidia-container for release

See merge request nvidia/container-toolkit/container-toolkit!152
2022-05-13 11:52:58 +00:00
Evan Lezar
a68d1d914c Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-13 13:52:20 +02:00
Evan Lezar
f7ac8b8139 Update changelog for release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-13 13:44:43 +02:00
Evan Lezar
b2902cc04a Merge branch 'CNT-2967/add-version-string' into 'main'
Add --version support for the CLIs

See merge request nvidia/container-toolkit/container-toolkit!151
2022-05-13 11:02:34 +00:00
Evan Lezar
25710468dc Ensure that git commit is set in docker build
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-13 07:31:11 +02:00
Evan Lezar
4a19bf16a8 Set the version and gitCommit in the Makefile
This change ensures that the variables used to construct the
version strings for CMDs are set in the makefile.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-13 07:31:11 +02:00
Evan Lezar
c77e86137e Add version output to CLIs
This change adds version output to the nvidia-continer-runtime,
nvidia-container-toolkit, and nvidia-ctk CLIs. The same version
is used in all cases and includes a version string and a git
revision if set.

The construction of the version string mirrors what is done in runc.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-13 07:31:11 +02:00
Evan Lezar
60dacb76b6 Call logger.Reset() to ensure errors are captured
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 15:42:42 +02:00
Evan Lezar
19138a2110 Skip setting of log file for --version flag
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 14:34:37 +02:00
Evan Lezar
bdb43aa8f2 Merge branch 'CNT-2973/add-has-nvml' into 'main'
Include HasNVML check in ResolveAutoMode

See merge request nvidia/container-toolkit/container-toolkit!149
2022-05-12 11:23:47 +00:00
Evan Lezar
d62cce3c75 Merge branch 'CNT-2953/new-options' into 'main'
Update config options to control OCI Spec modification

See merge request nvidia/container-toolkit/container-toolkit!145
2022-05-12 11:22:42 +00:00
Evan Lezar
ff86ecb2a5 Include HasNVML check in ResolveAutoMode
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:55:58 +02:00
Evan Lezar
ad9ec1efae Add HasNVML function to check if NVML is supported
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:55:13 +02:00
Evan Lezar
9db5f9c9e8 Remove unneeded legacy discovery
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:53:52 +02:00
Evan Lezar
4c49f75365 Remove --force flag from nvidia-container-runtime-hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:53:52 +02:00
Evan Lezar
e591f3f26b Replace experimental and discover-mode
These changes replace the nvidia-container-runtime config options
experimental and discover-mode with a single mode config option.

Note that mode is now a string with a default value of "auto"
and a mode value of "legacy" is equivalent to experimental == false.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:53:50 +02:00
Evan Lezar
e0ad82e467 Move ResolveAutoMode to info package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:28:56 +02:00
Evan Lezar
3a1404f2f4 Move isTegraSystem to internal info package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:28:56 +02:00
Evan Lezar
cf7bb91481 Update nvidia-container-runtime config options
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:28:56 +02:00
Evan Lezar
ba0e606df2 Use toml unmarshal to read runtime config
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-12 10:28:56 +02:00
Evan Lezar
ae57a2fc93 Merge branch 'CNT-2875/create-specific-symlinks' into 'main'
Create specific symlinks for CSV mode

See merge request nvidia/container-toolkit/container-toolkit!150
2022-05-12 05:27:43 +00:00
Evan Lezar
1eb0e3c8b3 Merge branch 'fix-executable-locator' into 'main'
Fix location of executables in PATH

See merge request nvidia/container-toolkit/container-toolkit!148
2022-05-12 05:26:22 +00:00
Evan Lezar
a524c44161 Merge branch 'CNT-2926/runc-logging' into 'main'
Support runc logging command line options

See merge request nvidia/container-toolkit/container-toolkit!144
2022-05-12 05:24:25 +00:00
Evan Lezar
675fbace01 Add hook to create specific links
This change updates the create-symlinks hook to also create symlinks for
libcuda.so, libGLX_indirect.so.0, and libnvidia-opticalflow.so

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-11 16:36:49 +02:00
Evan Lezar
eac326c5ea Add --link option to nvidia-ctk hook create-symlinks command
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-11 15:28:53 +02:00
Evan Lezar
b0f7a3809f Factor linkCreation into method
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-11 15:28:02 +02:00
Evan Lezar
126c004ee0 Improve symlink creation loop
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-11 15:17:15 +02:00
Evan Lezar
d2516cb5d5 Merge branch 'fix-container-root' into 'main'
Fix bug in update-ldcache hook when OCI spec contains a relative root

See merge request nvidia/container-toolkit/container-toolkit!147
2022-05-10 22:01:14 +00:00
Evan Lezar
4696d7ee69 Merge branch 'fix-hook-flags' into 'main'
Use singular instead of plural for hook arguments

See merge request nvidia/container-toolkit/container-toolkit!146
2022-05-10 22:00:51 +00:00
Evan Lezar
ef6f48e9f7 Use singular instead of plural for hook arguments
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-10 19:55:31 +02:00
Evan Lezar
088db09180 Use executable locator to find low-level runtime
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-10 15:21:48 +02:00
Evan Lezar
b8ef6be6ea Use lookup.GetPath from runtime hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-10 14:53:26 +02:00
Evan Lezar
1d2e1bd403 Add lookup.GetPath and lookup.GetPaths functions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-10 14:52:47 +02:00
Evan Lezar
55efdc8765 Use state.GetContainerRoot in nvidia-ctk hook subcommands
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-10 11:48:43 +02:00
Evan Lezar
395f6cecb2 Add GetContainerRoot to oci.State type
This change adds a GetContainerRoot to the oci.State type to
encapsulate the logic around determining the container root.
This Fixes a bug where relative roots (e.g. as generated by contianerd)
are not supported.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-10 11:48:43 +02:00
Evan Lezar
e9d929dc2f Support runc logging command line options
This change processes and supports runc logging command line arguments.
This allows for better integration into container engines such as
docker.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-09 19:32:38 +02:00
Evan Lezar
117f68fa6e Merge branch 'CNT-2924/low-level-runtime-config' into 'main'
Add nvidia-container-runtime.runtimes config option

See merge request nvidia/container-toolkit/container-toolkit!143
2022-05-09 17:31:02 +00:00
Evan Lezar
7574a0d7de Make output of bundle directory a debug message
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-09 09:38:16 +02:00
Evan Lezar
335de5a352 Switch to debug logging when locating runtimes
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-09 09:38:16 +02:00
Evan Lezar
c76946cbcc Add nvidia-container-runtime.runtimes config option
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-09 09:38:12 +02:00
Evan Lezar
e93bafa6d4 Merge branch 'CNT-2676/nvidia-require' into 'main'
Add support for checking requirements to CSV discovery

See merge request nvidia/container-toolkit/container-toolkit!141
2022-05-06 12:52:53 +00:00
Evan Lezar
785f120c31 Fix form -> from in comment
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-06 13:22:34 +02:00
Evan Lezar
9e46d41dbe Add debug logging when checking requirements
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 14:14:01 +02:00
Evan Lezar
70c4588197 Add compute capability of first device as arch property
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 14:11:30 +02:00
Evan Lezar
9f50ac95c4 Add CUDA ComputeCapability function
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 14:09:28 +02:00
Evan Lezar
75ce057878 Add debug log for command line arguments
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:47:39 +02:00
Evan Lezar
9d2363e12e Return low-level runtime if subcommand is not create
This also removes a test that invokes nvidia-container-runtime run --bundle
expecting an error. This test is no longer valid since this command line
is forwared to runc where the error should be detected.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
49f4bb3198 Check requirements before creating CSV discoverer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
583793b7ae Add processing for requirements and constraints
This change adds a Requirements abstraction that can be used to check
an images' NVIDIA_REQUIRE_* envvars against the host properties such
as CUDA version or architecture.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
5d7b3a4a96 Return raw spec from Spec.Load
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
a672713dba Add basic CUDA wrapper
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
50cf07e4cd Use CUDA image abstraction for runtime hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
8f0e1906c2 Add CUDA image abstraction
This change adds a CUDA image abstraction that encapsulates
the queries performed on a container image (e.g. envvars) to
check certain CUDA properties.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
2e319b5b08 Add gcc for Amazonlinux builds
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
f4d87e6912 Use go install to install go development tools
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
fd06c7a00b Bump golang version to 1.17.8
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Evan Lezar
8fabeed3a4 Update go vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-05-05 13:43:13 +02:00
Kevin Klues
0c737bbdcc Merge branch 'fix-image-builds' into 'main'
Fix image building due to GPG key update

See merge request nvidia/container-toolkit/container-toolkit!142
2022-04-29 13:06:39 +00:00
Evan Lezar
38a4c9fa8f Fix image building due to GPG key update
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-29 14:13:33 +02:00
Evan Lezar
6e60b24828 Merge branch 'fix-version-parsing' into 'main'
Use semver package to parse CUDA version

See merge request nvidia/container-toolkit/container-toolkit!140
2022-04-25 12:35:26 +00:00
Evan Lezar
bdf997c761 Use semver package to parse CUDA version
This avoids the use of scanf on a user-provided string which is flagged
as a security vulnerability.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-22 14:57:52 +02:00
Evan Lezar
4ce932e7a7 Merge branch 'import-release-tooling' into 'main'
Add package release tooling

See merge request nvidia/container-toolkit/container-toolkit!102
2022-04-20 07:57:32 +00:00
Evan Lezar
4145cdf7f7 Merge branch 'bump-libnvidia-container-reference' into 'main'
Update libnvidia-container reference

See merge request nvidia/container-toolkit/container-toolkit!139
2022-04-19 14:40:53 +00:00
Evan Lezar
0b2be45ba2 Update libnvidia-container reference
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-19 15:46:14 +02:00
Evan Lezar
ce3cdb6fd9 Merge branch 'update-libnvidia-container-tracking-branch' into 'main'
Update libnvidia-container branch to main

See merge request nvidia/container-toolkit/container-toolkit!137
2022-04-19 13:44:01 +00:00
Jon Mayo
3ba18f89b0 Merge branch 'remove-dockerhub-release' into 'main'
Remove dockerhub publishing

See merge request nvidia/container-toolkit/container-toolkit!138
2022-04-14 00:13:45 +00:00
Jon Mayo
0de159e8b4 libnvidia-container: 'main' track branch 2022-04-13 16:20:51 -07:00
Jon Mayo
3fbffa0b48 Remove dockerhub publishing 2022-04-13 14:19:48 -07:00
Evan Lezar
75dfea1406 Merge branch 'dependabot/go_modules/github.com/containers/podman/v4-4.0.3' into 'main'
Bump github.com/containers/podman/v4 from 4.0.1 to 4.0.3

See merge request nvidia/container-toolkit/container-toolkit!134
2022-04-13 14:04:29 +00:00
Evan Lezar
c24bd4aa4e Update libnvidia-container branch to main
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-13 15:52:57 +02:00
dependabot[bot]
2b9dc5cbcf Bump github.com/containers/podman/v4 from 4.0.1 to 4.0.3
Bumps [github.com/containers/podman/v4](https://github.com/containers/podman) from 4.0.1 to 4.0.3.
- [Release notes](https://github.com/containers/podman/releases)
- [Changelog](https://github.com/containers/podman/blob/v4.0.3/RELEASE_NOTES.md)
- [Commits](https://github.com/containers/podman/compare/v4.0.1...v4.0.3)

---
updated-dependencies:
- dependency-name: github.com/containers/podman/v4
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-04-13 11:05:29 +00:00
Evan Lezar
234d05e57e Improve handling of git remotes for gh-pages packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-13 12:14:51 +02:00
Evan Lezar
abb0b7be5d Add scripting to sign and publish packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-13 12:14:51 +02:00
Evan Lezar
c09e5aca77 Add envvar for package versions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-13 12:14:51 +02:00
Evan Lezar
6709da4cea Rename release.sh to build-packages.sh
The name release.sh is overloaded. This change renames the script to make the
intent clearer.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-13 12:14:51 +02:00
Evan Lezar
84f7daf108 Merge branch 'replace-master-with-main' into 'main'
Change master references to main

See merge request nvidia/container-toolkit/container-toolkit!135
2022-04-12 13:47:22 +00:00
Evan Lezar
ac49dc320c Change master references to main
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-12 14:52:38 +02:00
Evan Lezar
d304e06ffe Merge branch 'bump-version-1.10.0-rc.2' into 'main'
Bump version to v1.10.0-rc.2

See merge request nvidia/container-toolkit/container-toolkit!133
2022-04-12 10:33:38 +00:00
Evan Lezar
49756cb7ba Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-12 11:40:04 +02:00
Evan Lezar
8c7d919d9f Bump version to v1.10.0-rc.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-12 10:48:03 +02:00
Evan Lezar
d7f53dcf64 Merge branch 'add-experimental-config' into 'master'
Add commented experimental option to config files

See merge request nvidia/container-toolkit/container-toolkit!131
2022-04-11 11:48:25 +00:00
Evan Lezar
36ffd0983c Merge branch 'revert-skip-release' into 'master'
Revert changes to skip release of images

See merge request nvidia/container-toolkit/container-toolkit!132
2022-04-11 11:46:36 +00:00
Evan Lezar
be680c6633 Add commented experimental option to config files
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-11 12:57:03 +02:00
Evan Lezar
e47aa2962a Revert "[ci] Skip external releases if associated OUT_REGISTRY value is empty."
This reverts commit c2f35badb0.
2022-04-11 12:53:42 +02:00
Evan Lezar
b5000c8107 Revert "[ci] echo skipped commands"
This reverts commit 3dab9da80e.
2022-04-11 12:53:22 +02:00
Evan Lezar
6d3bcb8723 Merge branch 'add-log-level-config' into 'master'
Add log-level config option for nvidia-container-runtime

See merge request nvidia/container-toolkit/container-toolkit!130
2022-04-11 07:32:41 +00:00
Evan Lezar
29e690f68a Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 18:04:43 +02:00
Evan Lezar
c224832a6d Add log-level config option for nvidia-container-runtime
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 13:56:17 +02:00
Evan Lezar
5211960fc3 Merge branch 'detect-gpus-flag' into 'master'
Detect use of --gpus flag in experimental mode

See merge request nvidia/container-toolkit/container-toolkit!125
2022-04-08 11:18:11 +00:00
Evan Lezar
cfca18a5f8 Merge branch 'refactor-csv-mount-spec-discovery' into 'master'
Refactor CSV discovery to make char device discovery clearer

See merge request nvidia/container-toolkit/container-toolkit!129
2022-04-08 10:54:06 +00:00
Evan Lezar
43ee7f1cd2 Merge branch 'cleanup-default-executable-dir' into 'master'
Clean up NVIDIA Container Runtime Hook executable specification

See merge request nvidia/container-toolkit/container-toolkit!126
2022-04-08 10:29:25 +00:00
Evan Lezar
45160b88a4 Remove exsiting NVIDIA Container Runtime Hooks from the spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 12:03:22 +02:00
Evan Lezar
dab6f4b768 Specify --force flag when invoking nvidia-container-runtime-hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 12:03:22 +02:00
Evan Lezar
a9a4704273 Raise error if hook invoked in experimental mode without force flag
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 12:03:22 +02:00
Evan Lezar
2563c1b87c Export GetDefaultRuntimeConfig
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 12:03:22 +02:00
Evan Lezar
62f608a3fe Make order of discoverers deterministic
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 11:59:26 +02:00
Evan Lezar
2c1e356370 Refactor CSV discovery to make char device discovery clearer
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 11:47:47 +02:00
Evan Lezar
7ec3cd0b5b Fix creation of CSV parser in create-symlinks
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 11:39:18 +02:00
Evan Lezar
ab7f25500f Fix creation of CSV parser in create-symlinks
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 11:36:48 +02:00
Evan Lezar
196d5c5461 Move NVIDIA Container Runtime Hook executable name to shared constant
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 11:29:27 +02:00
Evan Lezar
f07d110e85 Use DefaultExecutableDir to determine default paths
This change adds a DefaultExecutableDir = /usr/bin constant that is used
to construct default paths for executables instead of specifying these
explicitly.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 11:28:03 +02:00
Evan Lezar
1ebd48dea6 Merge branch 'add-symlink-hook' into 'master'
Add hook create-symlinks subcommand to create symlinks in container

See merge request nvidia/container-toolkit/container-toolkit!121
2022-04-08 09:14:07 +00:00
Evan Lezar
f7c74d35cc Merge branch 'add-hooks-cli' into 'master'
Add nvidia-ctk CLI with hook command and update-ldcache subcommand to update LD cache

See merge request nvidia/container-toolkit/container-toolkit!115
2022-04-08 09:13:39 +00:00
Evan Lezar
0de7491ce3 Merge branch 'check-for-nil-modifier' into 'master'
Return unmodified runtime if specModifier is nil

See merge request nvidia/container-toolkit/container-toolkit!127
2022-04-08 09:05:24 +00:00
Evan Lezar
1296a0ecf4 Merge branch 'fix-missing-close-on-csv' into 'master'
Add missing close when reading CSV file

See merge request nvidia/container-toolkit/container-toolkit!128
2022-04-08 08:33:23 +00:00
Evan Lezar
d1a38f10a5 Refactor CSV file parsing
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 08:11:10 +02:00
Evan Lezar
d8109dc49b Add missing close when reading CSV file
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 08:00:37 +02:00
Evan Lezar
67602b28f9 Return unmodified runtime if specModifier is nil
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-08 07:50:40 +02:00
Evan Lezar
907736b053 Inject symlinks hook for creating symlinks in a container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 20:25:55 +02:00
Evan Lezar
ecb4ef495a Add create-symlinks subcommand to create symlinks in container for specified CSV files
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 20:25:55 +02:00
Evan Lezar
95797a8252 Move reading of container state for internal/oci package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 20:25:55 +02:00
Evan Lezar
c87ae586d4 FIX: Rename containerSpec flag to container-spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 20:25:19 +02:00
Evan Lezar
7c10762768 Include nvidia-ctk in deb and rpm packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 20:25:19 +02:00
Evan Lezar
9c3c8e038a Add cache for mounts
This change adds a cache to the mounts type. This means that if called to get
a list of folders, for example, the result is reused instead of recalculated.
This also avoids duplicate logging.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 20:25:19 +02:00
Evan Lezar
d970d0a627 Add discovery for ldconfig hook that updates the LDCache
This change adds a discovered hook for updating the ldcache as a container-create
hook. The mounts from a discoverer are inspected to determine the folders that must
be added to the cache using the nvidia-ctk hook update-ldcache command.

This is added to the "csv" discovery mode for the experimental runtime.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 20:25:19 +02:00
Evan Lezar
740bd3fb9d Add nvidia-ctk config section
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 20:25:18 +02:00
Evan Lezar
1c892af215 Add hook command to nvidia-ctk with update-ldcache subcommand
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 16:38:01 +02:00
Evan Lezar
c945cc714d Add stub nvidia-ctk CLI
This change adds an nvidia-ctk CLI that is used as the basis for
utilities related to the NVIDIA Container Toolkit.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 16:32:25 +02:00
Evan Lezar
7914957105 Refactor hook creation
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 16:32:24 +02:00
Evan Lezar
99baea9d51 Merge branch 'add-auto-discover-mode' into 'master'
Add auto discover mode and use this as the default

See merge request nvidia/container-toolkit/container-toolkit!124
2022-04-07 14:29:44 +00:00
Evan Lezar
516a658902 Merge branch 'add-jetson-csv-discovery' into 'master'
Add support for CSV mount specifications

See merge request nvidia/container-toolkit/container-toolkit!117
2022-04-07 14:25:51 +00:00
Evan Lezar
bb086d4b44 Add auto discover mode and use this as the default
This change adds an 'auto' discover mode that attempts to select the correct mode
for a given platform. This currently attempts to detect whether the platform is a
Tegra-based system in which case the 'csv' discover mode is used. The 'legacy'
discover mode is used as the fallback.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 15:37:03 +02:00
Evan Lezar
26d2873bb2 FIX: Rename DefaultRoot to DefaultMountSpecPath
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 14:11:52 +02:00
Evan Lezar
b7d130e151 FIX: Improve locator map construction
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 11:12:41 +02:00
Evan Lezar
8574879560 FIX: Update TODO for container path
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 11:07:57 +02:00
Evan Lezar
5a416bc99c FIX: Use MountSpec* constants
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 11:01:57 +02:00
Evan Lezar
df7c064257 FIX: Remove unused NewFromCSV constructor
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 10:59:03 +02:00
Evan Lezar
2f2846116e Correct typo in constructor name
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 10:46:26 +02:00
Evan Lezar
6682bc90b4 Add support for NVIDIA_REQUIRE_JETPACK envvar
This change ensures that by default, the CSV discovery only considers the base CSV
files (l4t.csv, drivers.csv, devices.csv) and skips the rest unless the
NVIDIA_REQUIRE_JETPACK is set to "csv-mounts=all", in which case, all CSV files in the
specified folder are considered.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 10:46:26 +02:00
Evan Lezar
1c05a463bd Add csv discovery mode to experimental runtime
This change adds support for a "csv" discovery mode to the experimental runtime.
If this is set with experimental = true, a CSV-based discovery of devices and
mounts are used to define the modifications required to the OCI spec. The edits
are expressed as CDI ContainerEdits.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 10:45:19 +02:00
Evan Lezar
14f9e986c9 Add CSV-based discovery of device nodes
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 10:44:14 +02:00
Evan Lezar
af0ef6fb66 Add CSV-based discovery of mounts
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 10:44:14 +02:00
Evan Lezar
7c5504a1cf Add locators for symlinks and character devices
This change adds a symlink locator that follows symlinks and returns all
elements in the chain and a device locator that finds character devices.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 10:44:14 +02:00
Evan Lezar
8e85e96f38 Add code to process Jetpack CSV files
This change adds code to process Jetpack CSV mount specifications.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-07 10:44:14 +02:00
Evan Lezar
1561a67d55 Merge branch 'add-v2-runtime-stub' into 'master'
Add experimental mode to nvidia-container-runtime

See merge request nvidia/container-toolkit/container-toolkit!114
2022-04-06 17:41:54 +00:00
Evan Lezar
9ce690093d FIX: Make isNVIDIAContainerRuntimeHook mode idiomatic
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-06 17:18:06 +02:00
Evan Lezar
b8dd473343 FIX: Simplify hook remover
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-06 17:15:57 +02:00
Evan Lezar
96e8eb3dde FIX: Rename path locator as executable locator
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-06 15:24:48 +02:00
Evan Lezar
0054481e15 FIX: Rename CLIConfig to ContainerCLIConfig
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-06 15:21:57 +02:00
Evan Lezar
11aa1d2a7d FIX: Factor out specModifier construction into function
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-06 15:18:12 +02:00
Evan Lezar
e6730fd0f0 FIX: Don't log that hooks is being removed if it is not
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-06 15:13:32 +02:00
Evan Lezar
8db287af8b FIX: Fix typo in comment
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-06 14:46:27 +02:00
Jon Mayo
3dab9da80e [ci] echo skipped commands 2022-04-04 07:02:33 -07:00
Evan Lezar
282a2c145e Fix typo in variable name
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:26 +02:00
Evan Lezar
d0608844dc Add basic README for nvidia-container-runtime
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:26 +02:00
Evan Lezar
a26d02890f Make error logging less verbose by default
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:26 +02:00
Evan Lezar
14fe35c3f4 Implement hook remover for existing nvidia-container-runtime-hooks
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:26 +02:00
Evan Lezar
d12dbd1bef Read top-level config to propagate Root to experimental runtime
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:25 +02:00
Evan Lezar
33d9c1dd57 Split loading config from reader and getting config from toml.Tree
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:25 +02:00
Evan Lezar
239b6d3739 Implement experimental modifier for NVIDIA Container Runtime
This change enables the experimental mode of the NVIDIA Container Runtime. If
enabled, the nvidia-container-runtime.discover-mode config option is
queried to determine how required OCI spec modifications should be defined.
If "legacy" is selected, the existing NVIDIA Container Runtime hooks is
discovered and injected into the OCI spec.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:25 +02:00
Evan Lezar
9dfe60b8b7 Add stable discoverer for nvidia-container-runtime hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:25 +02:00
Evan Lezar
390e5747ea Add lookup abstraction for locating executable files
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:25 +02:00
Evan Lezar
7137f4b05b Move runtime config to internal package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:24 +02:00
Evan Lezar
9be6cca6db Don't skip internal packages for linting
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:24 +02:00
Evan Lezar
0c7eb93d62 Add experimental option to NVIDIA Container Runtime config
This change adds an experimental option to the NVIDIA Container Runtime config. To
simplify the extension of this experimental mode in future an error is raised if
this is enabled.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:24 +02:00
Evan Lezar
3bb539a5f7 Update libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-04-04 14:16:22 +02:00
Jon Mayo
e39412ca44 Merge branch 'ci-release-ifonly' into 'master'
[ci] Skip external releases if associated OUT_REGISTRY value is empty.

See merge request nvidia/container-toolkit/container-toolkit!123
2022-03-31 20:29:13 +00:00
Jon Mayo
c2f35badb0 [ci] Skip external releases if associated OUT_REGISTRY value is empty.
Allows CI/CD environment variables to quickly disable any release job derived from the .release:external template

Template Usage: DRYRUN_RELEASE set to a value to echo docker and regctl commands in Makefile without running them (dry-run) SKIP_RELEASE set to a value to remove the job from the pipeline.

CI/CD Usage: NGC_SKIP_RELEASE set to disable external release to NGC. DOCKERHUB_SKIP_RELEASE set to disable external release to DH. NGC_DRYRUN_RELEASE set to dry-run external release to NGC. DOCKERHUB_DRYRUN_RELEASE set to dry-run external release to DH.
2022-03-31 20:29:13 +00:00
Evan Lezar
d0dfe27324 Merge branch 'refactor-stable-runtime' into 'master'
Refactor nvidia-container-runtime to prepare for experimental option

See merge request nvidia/container-toolkit/container-toolkit!119
2022-03-29 12:23:18 +00:00
Evan Lezar
c6dfc1027d Move modifier code for inserting nvidia-container-runtime-hook to separate package
This change moves the code defining the insertion of the nvidia-container-runtime
hook to a separate package. This allows for better distinction between the existing
and experimental modifications.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:42 +02:00
Evan Lezar
4177fddcc4 Import modifying runtime abstraction from experimental runtime
This change imports the modifying runtime abstraction from the
experimental branch. This encapsulates the checks for whether
modification is required, and forwards the loaded spec to
the specified modifier. This allows for the same code to be
reused when performing more complex modifications.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:42 +02:00
Evan Lezar
bf8c3bab72 Add test package with GetModuleRoot and PrependToPath function
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:41 +02:00
Evan Lezar
c5c2ffd68f Ensure that Exec error is also logged to file
This change removes unneeded logging and renames the return error value to rerr
to avoid it being aliased by local error values.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:41 +02:00
Evan Lezar
48d5a1cd1a Update go vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:41 +02:00
Evan Lezar
a7580e3872 Update podman hooks dependency
This is required to ensure that a newer version of
github.com/opencontainers/runtime-tools/generate is imported for use
with CDI.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:39 +02:00
Evan Lezar
4bf05325b5 Add .shell make target for non-Linux development
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:39 +02:00
Evan Lezar
ea7b8ab1f6 Add gcc for centos package builds including cgo
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:39 +02:00
Evan Lezar
c4bad9b36a Update gitignore
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 11:05:39 +02:00
Evan Lezar
3479e353c5 Merge branch 'centos8-stream' into 'master'
Switch to CentOS Stream 8 to build centos8 packages

See merge request nvidia/container-toolkit/container-toolkit!122
2022-03-29 09:03:48 +00:00
Evan Lezar
f50b4b2f91 Switch from centos:8 to centos:stream8 images to build centos8 packages
Due to the EOL of centos:8 we switch to centos:stream8 to build the centos8 and
rhel8 packages.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 08:07:06 +02:00
Evan Lezar
24ce09db0e Update git submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-29 08:07:06 +02:00
Evan Lezar
a904076cf0 Update libnvidia-container submodule to v1.10.0-rc.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-28 15:54:28 +02:00
Evan Lezar
24d3f854af Bump version to 1.10.0-rc.1
This change make the following version bumps:

* nvidia-container-toolkit to 1.10.0-rc.1
* nvidia-contianer-runtime to 3.10.0-rc.1
* nvidia-docker to 2.10.0-rc.1

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-24 16:56:27 +02:00
Evan Lezar
56ad97b8e5 Merge branch 'bump-1.9.0' into 'master'
Bump version to 1.9.0

See merge request nvidia/container-toolkit/container-toolkit!118
2022-03-18 13:36:30 +00:00
Evan Lezar
eb3be9d676 Use nvcr.io registry for Ubuntu CUDA base images
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-18 14:44:55 +02:00
Evan Lezar
4a3b532c29 Add CI definitions for building and publishing Ubuntu20.04 images
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-18 14:24:50 +02:00
Evan Lezar
cc68635c70 Upcate libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-18 12:34:02 +02:00
Evan Lezar
106279368a Bump version to 1.9.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-03-18 06:19:58 +02:00
Christopher Desiniotis
96772ccdcc Merge branch 'cve-libsasl' into 'master'
Update libsasl in both ubuntu/ubi toolkit images to address CVE-2022-24407

See merge request nvidia/container-toolkit/container-toolkit!116
2022-03-16 17:41:21 +00:00
Christopher Desiniotis
e2d1d379d5 Update libsasl in both ubuntu/ubi toolkit images to address CVE-2022-24407 2022-03-16 17:41:21 +00:00
Evan Lezar
cf74d14504 Merge branch 'update-libnvidia-container' into 'master'
Update libnvidia-container subcomponent

See merge request nvidia/container-toolkit/container-toolkit!112
2022-02-25 21:55:22 +00:00
Evan Lezar
aa3784d185 Update libnvidia-container subcomponent
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-25 21:58:19 +02:00
Evan Lezar
b0bb7b46e4 Merge branch 'CNT-2170/multi-arch' into 'master'
Use buildx and regctl to publish multi-arch images

See merge request nvidia/container-toolkit/container-toolkit!103
2022-02-23 07:08:56 +00:00
Evan Lezar
43ba5267c7 Merge branch 'add-docker-restart-mode-to-config' into 'master'
Add --restart-mode to docker config CLI

See merge request nvidia/container-toolkit/container-toolkit!106
2022-02-22 16:47:11 +00:00
Evan Lezar
5d4ecc24cb Use 'none' instead of 'NONE' to skip containerd restart
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 16:13:44 +02:00
Evan Lezar
d8ed16585a Add --restart-mode to docker config CLI
This change adds a --restart-mode option to the docker config CLI.
This mirrors the option added for containerd and allows 'none' to be
specified to disable the restart of docker. This is useful in
cases where the updated docker config should be reloaded out of
band.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 16:13:44 +02:00
Evan Lezar
a2060c74b3 Update component submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 16:13:44 +02:00
Evan Lezar
2e4ed47ac4 Fix pushing of short tag for devel images
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
93ca91ac3f Add multi-arch image scans
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
cc593087d2 Also search /usr/lib/aarch64-linux-gnu for libnvidia-container libs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
b05db2befe Enable multi-arch builds in CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
a0d2b22a54 Enable multi-arch builds
This change adds arm64/aarch64 images to supported distributions.
This is triggered if BUILD_MULTI_ARCH_IMAGE=true.

Note that for ubi8 images this means that we switch to using centos8
packages instead of centos7 since we do not build aarch64 packages
for the latter.

This also means that for centos7 we only build x86_64 images.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
e8d555f155 Allow buildx to be used for mulit-arch images
This change allows for docker buildx to be used to build container
images. This also allows multi-arch images being built.

In addition to using docker buildx to build images, regctl as a
replacement for the docker push command to release images. This
tool also supports regctl.

The selection of docker buildx (and regctl) is controlled by a
BUILD_MULTI_ARCH_IMAGES make variable. If this is 'true',
the build-% make targets for the toolkit container will be
run through buildx  and the equivalent push-% targets will trigger
a regctl command.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
ec7de9c4e8 Rename TARGETS make variable to DISTRIBUTIONS
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
74ddfe901a Specify docker platform args for build and run commands
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
a1ce176fc4 Ensure that Ubuntu20.04 images also build
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
980185db55 Remove unneeded build-all CI steps
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
ea4013fcd5 Fix centos8 builds
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
97762ce5f9 Update submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-22 10:19:20 +02:00
Evan Lezar
2adee1445b Merge branch 'fix-centos8' into 'master'
Fix centos8 builds

See merge request nvidia/container-toolkit/container-toolkit!111
2022-02-18 14:58:13 +00:00
Evan Lezar
38b49a7faa Remove unneeded build-all CI steps
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-18 16:13:38 +02:00
Evan Lezar
7b78a2a701 Update submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-18 16:10:50 +02:00
Evan Lezar
596d7e8108 Fix centos8 builds
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-18 16:10:50 +02:00
Evan Lezar
5925b7e977 Bump version to 1.9.0-rc.1
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-18 16:10:47 +02:00
Evan Lezar
9d64ab6fb7 Merge branch 'fix-release-tests' into 'master'
Update centos:8 mirrors for release tests

See merge request nvidia/container-toolkit/container-toolkit!110
2022-02-17 14:58:30 +00:00
Evan Lezar
2ea632a861 Update centos:8 mirrors for release tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-14 14:15:33 +01:00
Evan Lezar
2c0a66c08c Merge branch 'update-libnvidia-container' into 'master'
Update changelogs

See merge request nvidia/container-toolkit/container-toolkit!109
2022-02-14 11:52:36 +00:00
Evan Lezar
ce7076e231 Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-14 12:09:03 +01:00
Evan Lezar
b79c9b9bca Update changelogs
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-14 10:12:45 +01:00
Evan Lezar
37a00041c4 Merge branch 'bump-1.8.1' into 'master'
Bump version to 1.8.1

See merge request nvidia/container-toolkit/container-toolkit!107
2022-02-10 08:43:20 +00:00
Evan Lezar
424b591535 Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-10 09:00:14 +01:00
Evan Lezar
99f6d45d71 Bump version to 1.8.1
This change make the following version bumps:

* nvidia-container-toolkit to 1.8.1
* nvidia-contianer-runtime to 3.8.1
* nvidia-docker to 2.9.1

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-10 08:59:17 +01:00
Evan Lezar
a85caf93ff Fix changelog entry in rpm spec
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-09 14:00:41 +01:00
Kevin Klues
87e715ce6b Merge branch 'bump-version-1.8.0' into 'master'
Bump version to 1.8.0

See merge request nvidia/container-toolkit/container-toolkit!105
2022-02-04 09:08:17 +00:00
Evan Lezar
96811666b4 Update component submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-04 09:24:29 +01:00
Evan Lezar
c76767d703 Bump version to 1.8.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-04 09:24:27 +01:00
Evan Lezar
588fdc82f7 Merge branch 'fix-centos8' into 'master'
Update centos8 repos

See merge request nvidia/container-toolkit/container-toolkit!104
2022-02-03 08:32:04 +00:00
Evan Lezar
5863be46ee Use 2h30m timeout for all packaging stages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-02 15:53:46 +01:00
Evan Lezar
f097af79ca Update centos8 mirrors
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-02 13:43:31 +01:00
Evan Lezar
5c76493642 Update sub-modules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-02 13:43:31 +01:00
Evan Lezar
ad877fb811 Bump version to 1.8.0-rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-02-02 13:43:31 +01:00
Evan Lezar
4562cb559c Merge branch 'update-release' into 'master'
Add scripting to update component submodules

See merge request nvidia/container-toolkit/container-toolkit!97
2022-01-28 10:44:51 +00:00
Evan Lezar
72e17e8632 Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-27 18:59:08 +01:00
Evan Lezar
6898917f41 Update components before building release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-27 16:18:23 +01:00
Evan Lezar
53c130fb3c Merge branch 'remove-amazonlinux1' into 'master'
Remove building of Amazonlinux1 packages

See merge request nvidia/container-toolkit/container-toolkit!98
2022-01-24 12:31:35 +00:00
Evan Lezar
45bd3002da Merge branch 'CNT-2396/include-libnvidia-container-go' into 'master'
Copy libnivida-container-go to toolkit directory

See merge request nvidia/container-toolkit/container-toolkit!100
2022-01-21 15:48:53 +00:00
Evan Lezar
58042d78df Copy libnivida-container-go.so to toolkit directory
As of the NVIDIA Container Toolkit 1.8.0-rc.1 the libnvida-container*
packages also provide a libnvidia-container-go library. This must also
be installed in the toolkit container.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-21 15:07:36 +01:00
Evan Lezar
aa52b12c09 Merge branch 'bump-version-1.8.0-rc.2' into 'master'
Bump version to 1.8.0-rc.2

See merge request nvidia/container-toolkit/container-toolkit!96
2022-01-20 18:13:54 +00:00
Evan Lezar
47bc4f90ba Remove support for amazonlinux1
This commit removes support for building amazonlinux1 packages.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-20 17:47:46 +01:00
Evan Lezar
41c1c2312a Add check for matching toolkit and lib versions to release script
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-20 17:44:00 +01:00
Evan Lezar
9d34134b3f Update git submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-20 17:43:44 +01:00
Evan Lezar
d931e861f3 Merge branch 'update-cuda-image' into 'master'
Update CUDA image version to 11.6.0

See merge request nvidia/container-toolkit/container-toolkit!99
2022-01-20 14:50:45 +00:00
Evan Lezar
b1c9b8bb49 Bump version to 1.8.0-rc.2
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-20 15:30:13 +01:00
Evan Lezar
50fbcebe31 Update CUDA image version to 11.6.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-20 15:07:32 +01:00
Kevin Klues
78f38455fd Merge branch 'fix-libnvidia-container-submodule' into 'master'
Update libnvidia-container submodule for WITH_NVCGO CI build fix

See merge request nvidia/container-toolkit/container-toolkit!92
2021-12-08 13:58:32 +00:00
Evan Lezar
f57e9b969c Update libnvidia-container submodule for WITH_NVCGO CI build fix
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-08 14:57:12 +01:00
Evan Lezar
a174aae7b5 Merge branch 'update-libnvidia-container' into 'master'
Update libnvidia-container submodule

See merge request nvidia/container-toolkit/container-toolkit!91
2021-12-08 12:33:51 +00:00
Evan Lezar
6890cb2ed8 Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-08 12:57:15 +01:00
Evan Lezar
13603e9794 Merge branch 'fix-centos7' into 'master'
Upgrade NSS for critical CVE in centos7 image

See merge request nvidia/container-toolkit/container-toolkit!90
2021-12-07 16:43:08 +00:00
Evan Lezar
afb260d82e Update nss on centos7 to address CVEs
This addresses https://access.redhat.com/security/cve/CVE-2021-43527

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-07 16:20:17 +01:00
Evan Lezar
f0311bfe17 Allow packages to be specified to address CVEs
This change allows the CVE_UPGRADES build arg to be set
to address CVEs in base images instead of requesting waivers.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-07 16:19:01 +01:00
Evan Lezar
050c29b157 Merge branch 'enable-image-release' into 'master'
Enable release of toolkit-container images

See merge request nvidia/container-toolkit/container-toolkit!89
2021-12-06 09:15:57 +00:00
Evan Lezar
de9afd4623 Merge branch 'bump-post-1.7.0' into 'master'
Bump version post 1.7.0 release

See merge request nvidia/container-toolkit/container-toolkit!88
2021-12-03 16:03:19 +00:00
Evan Lezar
b231d8f365 Merge branch 'fix-skip-scan' into 'master'
Simplify skipping of scans

See merge request nvidia/container-toolkit/container-toolkit!87
2021-12-03 16:03:11 +00:00
Evan Lezar
ee2b84b228 Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-03 16:19:31 +01:00
Evan Lezar
0c24fa83ae Bump version post 1.7.0 release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-03 16:19:29 +01:00
Evan Lezar
79660d1e55 Enable release of toolkit-container images
This change enables the release of toolkit-container images from this
repository instead of the container-config repository. This ensures
that these images are released along with the packages for the
NVIDIA Contianer Toolkit components.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-03 15:30:15 +01:00
Evan Lezar
39d2ff06fa Simplify skipping of scans
Scans are now only skipped if the SKIP_SCANS=yes.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-03 14:39:11 +01:00
Evan Lezar
0ac288e6dd Merge branch 'add-package-upload' into 'master'
Generate image containing packages for release

See merge request nvidia/container-toolkit/container-toolkit!82
2021-12-03 13:25:16 +00:00
Evan Lezar
b334f1977b Add delay and timeout to image pull job
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-03 12:19:57 +01:00
Evan Lezar
2d07385e81 Pull public staging images to scan and release
This change pulls images from public staging repositories to scan
and release. This ensures that the bits built and tested in public
CI (off the master branch, for example) match those scanned and
released. This also serves to reduce the load on our internal CI
runners as these don't have to store artifacts and build images.

Two CI variables: STAGING_REGISTRY and STAGING_VERSION are used
to control which image is pulled for release, with the latter
defaulting to the CI_COMMIT_SHORT_SHA.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-02 17:31:07 +01:00
Evan Lezar
fd5a1a72f0 Address review comments
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-01 18:45:30 +01:00
Evan Lezar
738d28dac5 Add script to pull packages from packaging image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-01 18:45:30 +01:00
Evan Lezar
e662e8197c Add placeholder for testing packaging image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-01 18:45:30 +01:00
Evan Lezar
2964f26533 Add packaging target to CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-01 18:45:30 +01:00
Evan Lezar
629d575fad Add packaging target that includes all release packages
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-01 18:45:30 +01:00
Evan Lezar
7fb04878c7 Include all architecture packages in toolkit container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-01 18:45:30 +01:00
Evan Lezar
f10f533fb2 Merge branch 'bump-1.7.0' into 'master'
Bump version to 1.7.0

See merge request nvidia/container-toolkit/container-toolkit!85
2021-11-30 18:37:01 +00:00
Evan Lezar
9c2cdc2f81 Update libnvidia-container submodule
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-30 14:44:31 +01:00
Evan Lezar
5bbaf8af4b Bump version to 1.7.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-30 14:27:17 +01:00
Evan Lezar
c6ce5b5a29 Merge branch 'set-other-package-versions' into 'master'
Set nvidia-container-runtime and nvidia-docker versions

See merge request nvidia/container-toolkit/container-toolkit!84
2021-11-30 13:04:39 +00:00
Evan Lezar
b9e752e24e Update submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-30 13:35:48 +01:00
Evan Lezar
94849fa822 Bump golang version to 1.16.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-30 13:35:48 +01:00
Evan Lezar
b0d6948d94 Add versions.mk file to define versions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-30 13:35:46 +01:00
Evan Lezar
995bd0d34a Merge branch 'add-multi-arch-package-tests' into 'master'
Allow testing of packages for non-native architectures

See merge request nvidia/container-toolkit/container-toolkit!80
2021-11-29 13:57:53 +00:00
Evan Lezar
27bb5cca0c Specify nvidia-container-runtime and nvidia-docker versions
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-29 14:21:54 +01:00
Evan Lezar
72d1d90ce9 Bump post 1.7.0-rc.1 release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-29 10:16:25 +01:00
Evan Lezar
6a1f7d0228 Don't rebuild packages for every local run
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-25 14:00:21 +01:00
Evan Lezar
094631329f Add basic multi-arch support to release tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-25 14:00:21 +01:00
Evan Lezar
6731f050da Rework init repo for centos8 release tests
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-25 14:00:21 +01:00
Evan Lezar
2ee6ec5d17 Merge branch 'update-libnvidia-container' into 'master'
Update libnvidia-container to latest for release

See merge request nvidia/container-toolkit/container-toolkit!83
2021-11-25 11:08:01 +00:00
Evan Lezar
1c25b349b1 Update libnvidia-container dependency for release
This includes support for filtering CLI flags based on libnvidia-container
version.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-25 11:38:56 +01:00
Evan Lezar
d87bdf9ab6 Update changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-25 11:37:51 +01:00
Evan Lezar
472c89d051 Merge branch 'remove-containerd-dependency' into 'master'
Remove containerd dependency

See merge request nvidia/container-toolkit/container-toolkit!81
2021-11-25 09:13:29 +00:00
Evan Lezar
3470f2ecb9 Merge branch 'add-supported-driver-capabilities' into 'master'
Add supported-driver-capabilities config option

See merge request nvidia/container-toolkit/container-toolkit!74
2021-11-24 15:43:30 +00:00
Evan Lezar
9c27e03c87 Merge branch 'post-1.6.0-release' into 'master'
Bump post 1.6.0 release

See merge request nvidia/container-toolkit/container-toolkit!79
2021-11-24 15:40:36 +00:00
Evan Lezar
09c6995ff9 Update vendoring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-24 15:42:37 +01:00
Evan Lezar
e2ec381093 Specify containerd runtime type as string
This removes the need to import the containerd package and reduces
the dependency management overhead.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-24 15:42:37 +01:00
Evan Lezar
7a31ebadb1 Update submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-24 15:42:06 +01:00
Evan Lezar
7a34be62b2 Override LIB_TAGS for runtime and docker wrapper
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-24 10:59:10 +01:00
Evan Lezar
a4441b6545 Bump post 1.6.0 release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-24 10:54:06 +01:00
Evan Lezar
ab3ebe5e49 Add jetpack-specific config.toml
This chagne adds a jetpack-specific config.toml file which specifies
supported-driver-capabilities to remove the unsupported ngx capability.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-17 16:53:08 +01:00
Evan Lezar
ea0bf6fbf8 Specify config.toml file suffix as docker build arg
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-17 16:53:08 +01:00
Evan Lezar
0a2db7c70e Add nvidia-container-config option to overide drivercapabilities
This change adds support for a supported-driver-capabilities config
option in the config.toml file that allows the driver capabilities
associated with the NVIDIA_DRIVER_CAPABILITIES=all environment variable.
This can be used on platforms such as Jetson to remove unsupported
capabilities such as "ngx".

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-17 16:53:08 +01:00
Evan Lezar
92bb04f0fd Merge branch 'bump-version-1.6.0' into 'master'
Bump version for 1.6.0 release

See merge request nvidia/container-toolkit/container-toolkit!78
2021-11-17 11:17:39 +00:00
Evan Lezar
4d224a114a Update components versions for 1.6.0 release
* libnvidia-container v1.6.0: dd2c49d6699e4d8529fbeaa58ee91554977b652e
* nvidia-container-runtime v3.6.0: 38ff520daa33d3a3a733440957c6aa346757bd1f
* nvidia-docker v2.7.0: fd3233aa5f4ade28ac6bda616c2fa77a0ce89cd9

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-17 11:49:06 +01:00
Evan Lezar
2795e7d132 Bump version to 1.6.0
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-17 10:07:36 +01:00
Evan Lezar
58801d0c71 Fix logging to stderr instead of file logger
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-16 21:37:50 +01:00
Christopher Desiniotis
a13c785865 Merge branch 'pulse-ci' into 'master'
[ci] remove --pss flag from pulse scanning

See merge request nvidia/container-toolkit/container-toolkit!77
2021-11-16 18:55:29 +00:00
Christopher Desiniotis
b57b8661ca [ci] remove --pss flag from pulse scanning
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
2021-11-16 08:38:43 -08:00
Evan Lezar
d2575abd3a Merge branch 'match-toolkit-tag' into 'master'
Ensure that package tags for components match

See merge request nvidia/container-toolkit/container-toolkit!76
2021-11-16 10:57:37 +00:00
Evan Lezar
bc1f6e05a0 Check for matching tags in release script
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-16 09:53:32 +01:00
Evan Lezar
5db5205647 Get tags for all components in get-component-versions script
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-16 09:49:55 +01:00
Evan Lezar
6a747f5dd3 Update submodules
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-16 09:49:13 +01:00
Kevin Klues
81f9caa9aa Merge branch 'bump-libnvidia-container' into 'master'
Update libnvidia-container to ff6ed3d5637f0537c4951a2757512108cc0ae147

See merge request nvidia/container-toolkit/container-toolkit!75
2021-11-15 15:58:20 +00:00
Evan Lezar
684b5e9237 Update libnvidia-container to ff6ed3d5637f0537c4951a2757512108cc0ae147
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-15 16:29:51 +01:00
Evan Lezar
7d4a8200eb Merge branch 'bump-version' into 'master'
Bump version post v1.6.0-rc.2 release

See merge request nvidia/container-toolkit/container-toolkit!73
2021-11-15 13:28:22 +00:00
Evan Lezar
060f670232 Update libnvidia-container submodule to 1.6.0-rc.3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-15 13:06:42 +01:00
Evan Lezar
1b3e2d9423 Bump version post v1.6.0-rc.2 release
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-15 13:03:24 +01:00
Evan Lezar
06cd37b892 Merge branch 'use-internal-oci' into 'master'
Import internal/oci package from experimental branch

See merge request nvidia/container-toolkit/container-toolkit!68
2021-11-12 10:03:46 +00:00
Evan Lezar
ec8a6d978d Import cmd/nvidia-container-runtime from experimental branch
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-10-29 16:49:05 +02:00
Evan Lezar
d234077780 Remove unneeded files
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-10-29 16:49:02 +02:00
Evan Lezar
b8acd7657a Import internal/oci package from experimental branch
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-10-29 16:44:31 +02:00
1082 changed files with 66147 additions and 224082 deletions

View File

@@ -12,14 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
default:
image: docker:stable
image: docker
services:
- name: docker:stable-dind
- name: docker:dind
command: ["--experimental"]
variables:
GIT_SUBMODULE_STRATEGY: recursive
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
BUILD_MULTI_ARCH_IMAGES: "true"
stages:
- image
@@ -32,88 +33,62 @@ stages:
- test
- scan
- release
- build-all
build-dev-image:
stage: image
script:
- apk --no-cache add make bash
- make .build-image
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- make .push-build-image
.requires-build-image:
image: "${BUILDIMAGE}"
.go-check:
extends:
- .requires-build-image
stage: go-checks
fmt:
extends:
- .go-check
script:
- make assert-fmt
vet:
extends:
- .go-check
script:
- make vet
lint:
extends:
- .go-check
script:
- make lint
allow_failure: true
ineffassign:
extends:
- .go-check
script:
- make ineffassign
allow_failure: true
misspell:
extends:
- .go-check
script:
- make misspell
go-build:
extends:
- .requires-build-image
stage: go-build
script:
- make build
unit-tests:
extends:
- .requires-build-image
stage: unit-tests
script:
- make coverage
# Define the distribution targets
.dist-amazonlinux2:
variables:
DIST: amazonlinux2
.dist-centos7:
variables:
DIST: centos7
CVE_UPDATES: "cyrus-sasl-lib"
.dist-centos8:
variables:
DIST: centos8
CVE_UPDATES: "cyrus-sasl-lib"
.dist-debian10:
variables:
DIST: debian10
.dist-debian9:
variables:
DIST: debian9
.dist-fedora35:
variables:
DIST: fedora35
.dist-opensuse-leap15.1:
variables:
DIST: opensuse-leap15.1
.dist-ubi8:
variables:
DIST: ubi8
CVE_UPDATES: "cyrus-sasl-lib"
.dist-ubuntu16.04:
variables:
DIST: ubuntu16.04
.dist-ubuntu18.04:
variables:
DIST: ubuntu18.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
.dist-ubuntu20.04:
variables:
DIST: ubuntu20.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
.dist-packaging:
variables:
DIST: packaging
# Define architecture targets
.arch-aarch64:
variables:
ARCH: aarch64
@@ -134,120 +109,14 @@ unit-tests:
variables:
ARCH: x86_64
# Define the package build helpers
.multi-arch-build:
before_script:
- apk add --no-cache coreutils build-base sed git bash make
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -c yes'
.package-artifacts:
# Define the platform targets
.platform-amd64:
variables:
ARTIFACTS_NAME: "toolkit-container-${CI_PIPELINE_ID}"
ARTIFACTS_ROOT: "toolkit-container-${CI_PIPELINE_ID}"
DIST_DIR: ${CI_PROJECT_DIR}/${ARTIFACTS_ROOT}
PLATFORM: linux/amd64
.package-build:
extends:
- .multi-arch-build
- .package-artifacts
stage: package-build
script:
- ./scripts/release.sh ${DIST}-${ARCH}
artifacts:
name: ${ARTIFACTS_NAME}
paths:
- ${ARTIFACTS_ROOT}
# Define the package build targets
package-ubuntu18.04-amd64:
extends:
- .package-build
- .dist-ubuntu18.04
- .arch-amd64
package-ubuntu18.04-arm64:
extends:
- .package-build
- .dist-ubuntu18.04
- .arch-arm64
package-ubuntu18.04-ppc64le:
extends:
- .package-build
- .dist-ubuntu18.04
- .arch-ppc64le
package-centos7-x86_64:
extends:
- .package-build
- .dist-centos7
- .arch-x86_64
package-centos8-x86_64:
extends:
- .package-build
- .dist-centos8
- .arch-x86_64
# Define the image build targets
.image-build:
stage: image-build
.platform-arm64:
variables:
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
VERSION: "${CI_COMMIT_SHORT_SHA}"
before_script:
- apk add --no-cache bash make
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
image-centos7:
extends:
- .image-build
- .package-artifacts
- .dist-centos7
needs:
- package-centos7-x86_64
script:
- make -f build/container/Makefile build-${DIST}
- make -f build/container/Makefile push-${DIST}
image-centos8:
extends:
- .image-build
- .package-artifacts
- .dist-centos8
needs:
- package-centos8-x86_64
script:
- make -f build/container/Makefile build-${DIST}
- make -f build/container/Makefile push-${DIST}
image-ubi8:
extends:
- .image-build
- .package-artifacts
- .dist-ubi8
needs:
# Note: The ubi8 image currently uses the centos7 packages
- package-centos7-x86_64
script:
- make -f build/container/Makefile build-${DIST}
- make -f build/container/Makefile push-${DIST}
image-ubuntu18.04:
extends:
- .image-build
- .package-artifacts
- .dist-ubuntu18.04
needs:
- package-ubuntu18.04-amd64
# TODO: These will be required once we generate multi-arch images
# - package-ubuntu18.04-arm64
# - package-ubuntu18.04-ppc64le
script:
- make -f build/container/Makefile build-${DIST}
- make -f build/container/Makefile push-${DIST}
PLATFORM: linux/arm64
# Define test helpers
.integration:
@@ -262,77 +131,38 @@ image-ubuntu18.04:
script:
- make -f build/container/Makefile test-${DIST}
.test:toolkit:
extends:
- .integration
variables:
TEST_CASES: "toolkit"
.test:docker:
extends:
- .integration
variables:
TEST_CASES: "docker"
.test:containerd:
# TODO: The containerd tests fail due to issues with SIGHUP.
# Until this is resolved with retry up to twice and allow failure here.
retry: 2
allow_failure: true
extends:
- .integration
variables:
TEST_CASES: "containerd"
.test:crio:
extends:
- .integration
variables:
TEST_CASES: "crio"
# Define the test targets
test-toolkit-ubuntu18.04:
test-packaging:
extends:
- .test:toolkit
- .dist-ubuntu18.04
- .integration
- .dist-packaging
needs:
- image-ubuntu18.04
- image-packaging
test-containerd-ubuntu18.04:
extends:
- .test:containerd
- .dist-ubuntu18.04
needs:
- image-ubuntu18.04
test-crio-ubuntu18.04:
extends:
- .test:crio
- .dist-ubuntu18.04
needs:
- image-ubuntu18.04
test-docker-ubuntu18.04:
extends:
- .test:docker
- .dist-ubuntu18.04
needs:
- image-ubuntu18.04
# Download the regctl binary for use in the release steps
.regctl-setup:
before_script:
- export REGCTL_VERSION=v0.3.10
- apk add --no-cache curl
- mkdir -p bin
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
- chmod a+x bin/regctl
- export PATH=$(pwd)/bin:${PATH}
# .release forms the base of the deployment jobs which push images to the CI registry.
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
# target os.
.release:
stage:
release
stage: release
variables:
# Define the source image for the release
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
VERSION: "${CI_COMMIT_SHORT_SHA}"
# OUT_IMAGE_VERSION is overridden for external releases
OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}"
stage: release
before_script:
- !reference [.regctl-setup, before_script]
# We ensure that the OUT_IMAGE_VERSION is set
- 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1'
@@ -340,16 +170,16 @@ test-docker-ubuntu18.04:
# need to tag the image.
# Note: a leading 'v' is stripped from the version if present
- apk add --no-cache make bash
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
script:
- docker tag "${IMAGE_NAME}:${VERSION}-${DIST}" "${OUT_IMAGE_NAME}:${OUT_IMAGE_VERSION}-${DIST}"
# Log in to the "output" registry, tag the image and push the image
- 'echo "Logging in to output registry ${OUT_REGISTRY}"'
- docker logout
- docker login -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" "${OUT_REGISTRY}"
- make IMAGE_NAME=${OUT_IMAGE_NAME} VERSION=${OUT_IMAGE_VERSION} -f build/container/Makefile push-${DIST}
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- regctl registry login "${CI_REGISTRY}" -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}"
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || echo "Logging in to output registry ${OUT_REGISTRY}"'
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"'
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
# Target
- make -f build/container/Makefile push-${DIST}
# Define a staging release step that pushes an image to an internal "staging" repository
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
@@ -364,7 +194,7 @@ test-docker-ubuntu18.04:
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/container-toolkit"
# Define an external release step that pushes an image to an external repository.
# This includes a devlopment image off master.
# This includes a devlopment image off main.
.release:external:
extends:
- .release
@@ -384,13 +214,6 @@ release:staging-centos7:
needs:
- image-centos7
release:staging-centos8:
extends:
- .release:staging
- .dist-centos8
needs:
- image-centos8
release:staging-ubi8:
extends:
- .release:staging
@@ -407,3 +230,20 @@ release:staging-ubuntu18.04:
- test-containerd-ubuntu18.04
- test-crio-ubuntu18.04
- test-docker-ubuntu18.04
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu20.04
needs:
- test-toolkit-ubuntu20.04
- test-containerd-ubuntu20.04
- test-crio-ubuntu20.04
- test-docker-ubuntu20.04
release:staging-packaging:
extends:
- .release:staging
- .dist-packaging
needs:
- test-packaging

3
.gitignore vendored
View File

@@ -1,8 +1,9 @@
dist
*.swp
*.swo
/coverage.out
/coverage.out*
/test/output/
/nvidia-container-runtime
/nvidia-container-toolkit
/nvidia-ctk
/shared-*

View File

@@ -15,46 +15,373 @@
include:
- .common-ci.yml
# build-all jobs build packages for every OS / ARCH combination we support.
#
# They are run under two conditions:
# 1) Automatically whenever a new tag is pushed to the repo (e.g. v1.1.0)
# 2) Manually by a reviewer just before merging a MR.
.build-all-for-arch:
build-dev-image:
stage: image
script:
- apk --no-cache add make bash
- make .build-image
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- make .push-build-image
.requires-build-image:
image: "${BUILDIMAGE}"
.go-check:
extends:
- .requires-build-image
stage: go-checks
fmt:
extends:
- .go-check
script:
- make assert-fmt
vet:
extends:
- .go-check
script:
- make vet
lint:
extends:
- .go-check
script:
- make lint
allow_failure: true
ineffassign:
extends:
- .go-check
script:
- make ineffassign
allow_failure: true
misspell:
extends:
- .go-check
script:
- make misspell
go-build:
extends:
- .requires-build-image
stage: go-build
script:
- make build
unit-tests:
extends:
- .requires-build-image
stage: unit-tests
script:
- make coverage
# Define the package build helpers
.multi-arch-build:
before_script:
- apk add --no-cache coreutils build-base sed git bash make
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -c yes'
.package-artifacts:
variables:
# Setting DIST=docker invokes the docker- release targets
DIST: docker
ARTIFACTS_NAME: "toolkit-container-${CI_PIPELINE_ID}"
ARTIFACTS_ROOT: "toolkit-container-${CI_PIPELINE_ID}"
DIST_DIR: ${CI_PROJECT_DIR}/${ARTIFACTS_ROOT}
.package-build:
extends:
- .multi-arch-build
- .package-artifacts
stage: package-build
timeout: 3h
script:
- ./scripts/build-packages.sh ${DIST}-${ARCH}
artifacts:
name: ${ARTIFACTS_NAME}
paths:
- ${ARTIFACTS_ROOT}
# Define the package build targets
package-amazonlinux2-aarch64:
extends:
- .package-build
stage: build-all
timeout: 2h 30m
rules:
- if: $CI_COMMIT_TAG
when: always
- .dist-amazonlinux2
- .arch-aarch64
# The full set of build-all jobs organized to
# have builds for each ARCH run in parallel.
build-all-amd64:
package-amazonlinux2-x86_64:
extends:
- .build-all-for-arch
- .arch-amd64
build-all-x86_64:
extends:
- .build-all-for-arch
- .package-build
- .dist-amazonlinux2
- .arch-x86_64
build-all-ppc64le:
package-centos7-ppc64le:
extends:
- .build-all-for-arch
- .package-build
- .dist-centos7
- .arch-ppc64le
build-all-arm64:
package-centos7-x86_64:
extends:
- .build-all-for-arch
- .package-build
- .dist-centos7
- .arch-x86_64
package-centos8-aarch64:
extends:
- .package-build
- .dist-centos8
- .arch-aarch64
package-centos8-ppc64le:
extends:
- .package-build
- .dist-centos8
- .arch-ppc64le
package-centos8-x86_64:
extends:
- .package-build
- .dist-centos8
- .arch-x86_64
package-debian10-amd64:
extends:
- .package-build
- .dist-debian10
- .arch-amd64
package-debian9-amd64:
extends:
- .package-build
- .dist-debian9
- .arch-amd64
package-fedora35-aarch64:
extends:
- .package-build
- .dist-fedora35
- .arch-aarch64
package-fedora35-x86_64:
extends:
- .package-build
- .dist-fedora35
- .arch-x86_64
package-opensuse-leap15.1-x86_64:
extends:
- .package-build
- .dist-opensuse-leap15.1
- .arch-x86_64
package-ubuntu16.04-amd64:
extends:
- .package-build
- .dist-ubuntu16.04
- .arch-amd64
package-ubuntu16.04-ppc64le:
extends:
- .package-build
- .dist-ubuntu16.04
- .arch-ppc64le
package-ubuntu18.04-amd64:
extends:
- .package-build
- .dist-ubuntu18.04
- .arch-amd64
package-ubuntu18.04-arm64:
extends:
- .package-build
- .dist-ubuntu18.04
- .arch-arm64
build-all-aarch64:
package-ubuntu18.04-ppc64le:
extends:
- .build-all-for-arch
- .arch-aarch64
- .package-build
- .dist-ubuntu18.04
- .arch-ppc64le
.buildx-setup:
before_script:
- export BUILDX_VERSION=v0.6.3
- apk add --no-cache curl
- mkdir -p ~/.docker/cli-plugins
- curl -sSLo ~/.docker/cli-plugins/docker-buildx "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64"
- chmod a+x ~/.docker/cli-plugins/docker-buildx
- docker buildx create --use --platform=linux/amd64,linux/arm64
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes'
# Define the image build targets
.image-build:
stage: image-build
variables:
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
VERSION: "${CI_COMMIT_SHORT_SHA}"
PUSH_ON_BUILD: "true"
before_script:
- !reference [.buildx-setup, before_script]
- apk add --no-cache bash make
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
script:
- make -f build/container/Makefile build-${DIST}
image-centos7:
extends:
- .image-build
- .package-artifacts
- .dist-centos7
needs:
- package-centos7-ppc64le
- package-centos7-x86_64
image-ubi8:
extends:
- .image-build
- .package-artifacts
- .dist-ubi8
needs:
# Note: The ubi8 image uses the centos8 packages
- package-centos8-aarch64
- package-centos8-x86_64
- package-centos8-ppc64le
image-ubuntu18.04:
extends:
- .image-build
- .package-artifacts
- .dist-ubuntu18.04
needs:
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- package-ubuntu18.04-ppc64le
image-ubuntu20.04:
extends:
- .image-build
- .package-artifacts
- .dist-ubuntu20.04
needs:
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- package-ubuntu18.04-ppc64le
# The DIST=packaging target creates an image containing all built packages
image-packaging:
extends:
- .image-build
- .package-artifacts
- .dist-packaging
needs:
- package-amazonlinux2-aarch64
- package-amazonlinux2-x86_64
- package-centos7-ppc64le
- package-centos7-x86_64
- package-centos8-aarch64
- package-centos8-ppc64le
- package-centos8-x86_64
- package-debian10-amd64
- package-debian9-amd64
- package-fedora35-aarch64
- package-fedora35-x86_64
- package-opensuse-leap15.1-x86_64
- package-ubuntu16.04-amd64
- package-ubuntu16.04-ppc64le
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- package-ubuntu18.04-ppc64le
# Define publish test helpers
.test:toolkit:
extends:
- .integration
variables:
TEST_CASES: "toolkit"
.test:docker:
extends:
- .integration
variables:
TEST_CASES: "docker"
.test:containerd:
# TODO: The containerd tests fail due to issues with SIGHUP.
# Until this is resolved with retry up to twice and allow failure here.
retry: 2
allow_failure: true
extends:
- .integration
variables:
TEST_CASES: "containerd"
.test:crio:
extends:
- .integration
variables:
TEST_CASES: "crio"
# Define the test targets
test-toolkit-ubuntu18.04:
extends:
- .test:toolkit
- .dist-ubuntu18.04
needs:
- image-ubuntu18.04
test-containerd-ubuntu18.04:
extends:
- .test:containerd
- .dist-ubuntu18.04
needs:
- image-ubuntu18.04
test-crio-ubuntu18.04:
extends:
- .test:crio
- .dist-ubuntu18.04
needs:
- image-ubuntu18.04
test-docker-ubuntu18.04:
extends:
- .test:docker
- .dist-ubuntu18.04
needs:
- image-ubuntu18.04
test-toolkit-ubuntu20.04:
extends:
- .test:toolkit
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
test-containerd-ubuntu20.04:
extends:
- .test:containerd
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
test-crio-ubuntu20.04:
extends:
- .test:crio
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
test-docker-ubuntu20.04:
extends:
- .test:docker
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04

3
.gitmodules vendored
View File

@@ -1,9 +1,12 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = main
[submodule "third_party/nvidia-container-runtime"]
path = third_party/nvidia-container-runtime
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git
branch = main
[submodule "third_party/nvidia-docker"]
path = third_party/nvidia-docker
url = https://gitlab.com/nvidia/container-toolkit/nvidia-docker.git
branch = main

View File

@@ -27,11 +27,69 @@ default:
variables:
DOCKER_DRIVER: overlay2
DOCKER_TLS_CERTDIR: "/certs"
# Release "devel"-tagged images off the master branch
RELEASE_DEVEL_BRANCH: "master"
# Release "devel"-tagged images off the main branch
RELEASE_DEVEL_BRANCH: "main"
DEVEL_RELEASE_IMAGE_VERSION: "devel"
# On the multi-arch builder we don't need the qemu setup.
SKIP_QEMU_SETUP: "1"
# Define the public staging registry
STAGING_REGISTRY: registry.gitlab.com/nvidia/container-toolkit/container-toolkit/staging
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
.image-pull:
stage: image-build
variables:
IN_REGISTRY: "${STAGING_REGISTRY}"
IN_IMAGE_NAME: container-toolkit
IN_VERSION: "${STAGING_VERSION}"
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
PUSH_MULTIPLE_TAGS: "false"
# We delay the job start to allow the public pipeline to generate the required images.
when: delayed
start_in: 30 minutes
timeout: 30 minutes
retry:
max: 2
when:
- job_execution_timeout
- stuck_or_timeout_failure
before_script:
- !reference [.regctl-setup, before_script]
- apk add --no-cache make bash
- >
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
script:
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
image-centos7:
extends:
- .image-pull
- .dist-centos7
image-ubi8:
extends:
- .image-pull
- .dist-ubi8
image-ubuntu18.04:
extends:
- .image-pull
- .dist-ubuntu18.04
image-ubuntu20.04:
extends:
- .image-pull
- .dist-ubuntu20.04
# The DIST=packaging target creates an image containing all built packages
image-packaging:
extends:
- .image-pull
- .dist-packaging
# We skip the integration tests for the internal CI:
.integration:
@@ -49,24 +107,20 @@ variables:
variables:
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
IMAGE_ARCHIVE: "container-toolkit.tar"
rules:
- if: $CI_COMMIT_MESSAGE =~ /\[skip[ _-]scans?\]/i
when: never
- if: $SKIP_SCANS
when: never
- if: $CI_COMMIT_TAG == null && $CI_COMMIT_BRANCH != $RELEASE_DEVEL_BRANCH
allow_failure: true
except:
variables:
- $SKIP_SCANS && $SKIP_SCANS == "yes"
before_script:
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
# TODO: We should specify the architecture here and scan all architectures
- docker pull "${IMAGE}"
- docker pull --platform="${PLATFORM}" "${IMAGE}"
- docker save "${IMAGE}" -o "${IMAGE_ARCHIVE}"
- AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0)
- >
export SSA_TOKEN=$(curl --request POST --header "Authorization: Basic $AuthHeader" --header "Content-Type: application/x-www-form-urlencoded" ${SSA_ISSUER_URL} | jq ".access_token" | tr -d '"')
- if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi
script:
- pulse-cli -n $NSPECT_ID --pss $PSS_URL --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
- pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
artifacts:
when: always
expire_in: 1 week
@@ -78,34 +132,65 @@ variables:
- policy_evaluation.json
# Define the scan targets
scan-centos7:
scan-centos7-amd64:
extends:
- .scan
- .dist-centos7
- .platform-amd64
needs:
- image-centos7
scan-centos8:
scan-centos7-arm64:
extends:
- .scan
- .dist-centos8
- .dist-centos7
- .platform-arm64
needs:
- image-centos8
- image-centos7
- scan-centos7-amd64
scan-ubuntu18.04:
scan-ubuntu18.04-amd64:
extends:
- .scan
- .dist-ubuntu18.04
- .platform-amd64
needs:
- image-ubuntu18.04
scan-ubi8:
scan-ubuntu20.04-amd64:
extends:
- .scan
- .dist-ubuntu20.04
- .platform-amd64
needs:
- image-ubuntu20.04
scan-ubuntu20.04-arm64:
extends:
- .scan
- .dist-ubuntu20.04
- .platform-arm64
needs:
- image-ubuntu20.04
- scan-ubuntu20.04-amd64
scan-ubi8-amd64:
extends:
- .scan
- .dist-ubi8
- .platform-amd64
needs:
- image-ubi8
scan-ubi8-arm64:
extends:
- .scan
- .dist-ubi8
- .platform-arm64
needs:
- image-ubi8
- scan-ubi8-amd64
# Define external release helpers
.release:ngc:
extends:
@@ -115,20 +200,20 @@ scan-ubi8:
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
OUT_REGISTRY: "${NGC_REGISTRY}"
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
# TODO: For now we disable external releases
DOCKER: echo
.release:dockerhub:
release:staging-ubuntu18.04:
extends:
- .release:external
variables:
OUT_REGISTRY_USER: "${REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${REGISTRY_TOKEN}"
OUT_REGISTRY: "${DOCKERHUB_REGISTRY}"
OUT_IMAGE_NAME: "${REGISTRY_IMAGE}"
- .release:staging
- .dist-ubuntu18.04
needs:
- image-ubuntu18.04
# TODO: For now we disable external releases
DOCKER: echo
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
# Define the external release targets
# Release to NGC
@@ -137,38 +222,17 @@ release:ngc-centos7:
- .release:ngc
- .dist-centos7
release:ngc-centos8:
extends:
- .release:ngc
- .dist-centos8
release:ngc-ubuntu18:
release:ngc-ubuntu18.04:
extends:
- .release:ngc
- .dist-ubuntu18.04
release:ngc-ubuntu20.04:
extends:
- .release:ngc
- .dist-ubuntu20.04
release:ngc-ubi8:
extends:
- .release:ngc
- .dist-ubi8
# Release to Dockerhub
release:dockerhub-centos7:
extends:
- .release:dockerhub
- .dist-centos7
release:dockerhub-centos8:
extends:
- .release:dockerhub
- .dist-centos8
release:dockerhub-ubuntu18:
extends:
- .release:dockerhub
- .dist-ubuntu18.04
release:dockerhub-ubi8:
extends:
- .release:dockerhub
- .dist-ubi8

196
CHANGELOG.md Normal file
View File

@@ -0,0 +1,196 @@
# NVIDIA Container Toolkit Changelog
## v1.11.0
* Promote v1.11.0-rc.3 to v1.11.0
## v1.11.0-rc.3
* Build fedora35 packages
* Introduce an `nvidia-container-toolkit-base` package for better dependency management
* Fix removal of `nvidia-container-runtime-hook` on RPM-based systems
* Inject platform files into container on Tegra-based systems
* [toolkit container] Update CUDA base images to 11.7.1
* [libnvidia-container] Preload libgcc_s.so.1 on arm64 systems
## v1.11.0-rc.2
* Allow `accept-nvidia-visible-devices-*` config options to be set by toolkit container
* [libnvidia-container] Fix bug where LDCache was not updated when the `--no-pivot-root` option was specified
## v1.11.0-rc.1
* Add discovery of GPUDirect Storage (`nvidia-fs*`) devices if the `NVIDIA_GDS` environment variable of the container is set to `enabled`
* Add discovery of MOFED Infiniband devices if the `NVIDIA_MOFED` environment variable of the container is set to `enabled`
* Fix bug in CSV mode where libraries listed as `sym` entries in mount specification are not added to the LDCache.
* Rename `nvidia-container-toolkit` executable to `nvidia-container-runtime-hook` and create `nvidia-container-toolkit` as a symlink to `nvidia-container-runtime-hook` instead.
* Add `nvidia-ctk runtime configure` command to configure the Docker config file (e.g. `/etc/docker/daemon.json`) for use with the NVIDIA Container Runtime.
## v1.10.0
* Promote v1.10.0-rc.3 to v1.10.0
## v1.10.0-rc.3
* Use default config instead of raising an error if config file cannot be found
* Ignore NVIDIA_REQUIRE_JETPACK* environment variables for requirement checks
* Fix bug in detection of Tegra systems where `/sys/devices/soc0/family` is ignored
* Fix bug where links to devices were detected as devices
* [libnvida-container] Fix bug introduced when adding libcudadebugger.so to list of libraries
## v1.10.0-rc.2
* Add support for NVIDIA_REQUIRE_* checks for cuda version and arch to csv mode
* Switch to debug logging to reduce log verbosity
* Support logging to logs requested in command line
* Fix bug when launching containers with relative root path (e.g. using containerd)
* Allow low-level runtime path to be set explicitly as nvidia-container-runtime.runtimes option
* Fix failure to locate low-level runtime if PATH envvar is unset
* Replace experimental option for NVIDIA Container Runtime with nvidia-container-runtime.mode = csv option
* Use csv as default mode on Tegra systems without NVML
* Add --version flag to all CLIs
* [libnvidia-container] Bump libtirpc to 1.3.2
* [libnvidia-container] Fix bug when running host ldconfig using glibc compiled with a non-standard prefix
* [libnvidia-container] Add libcudadebugger.so to list of compute libraries
## v1.10.0-rc.1
* Include nvidia-ctk CLI in installed binaries
* Add experimental option to NVIDIA Container Runtime
## v1.9.0
* [libnvidia-container] Add additional check for Tegra in /sys/.../family file in CLI
* [libnvidia-container] Update jetpack-specific CLI option to only load Base CSV files by default
* [libnvidia-container] Fix bug (from 1.8.0) when mounting GSP firmware into containers without /lib to /usr/lib symlinks
* [libnvidia-container] Update nvml.h to CUDA 11.6.1 nvML_DEV 11.6.55
* [libnvidia-container] Update switch statement to include new brands from latest nvml.h
* [libnvidia-container] Process all --require flags on Jetson platforms
* [libnvidia-container] Fix long-standing issue with running ldconfig on Debian systems
## v1.8.1
* [libnvidia-container] Fix bug in determining cgroup root when running in nested containers
* [libnvidia-container] Fix permission issue when determining cgroup version
## v1.8.0
* Promote 1.8.0-rc.2-1 to 1.8.0
## v1.8.0-rc.2
* Remove support for building amazonlinux1 packages
## v1.8.0-rc.1
* [libnvidia-container] Add support for cgroupv2
* Release toolkit-container images from nvidia-container-toolkit repository
## v1.7.0
* Promote 1.7.0-rc.1-1 to 1.7.0
* Bump Golang version to 1.16.4
## v1.7.0-rc.1
* Specify containerd runtime type as string in config tools to remove dependency on containerd package
* Add supported-driver-capabilities config option to allow for a subset of all driver capabilities to be specified
## v1.6.0
* Promote 1.6.0-rc.3-1 to 1.6.0
* Fix unnecessary logging to stderr instead of configured nvidia-container-runtime log file
## v1.6.0-rc.3
* Add supported-driver-capabilities config option to the nvidia-container-toolkit
* Move OCI and command line checks for runtime to internal oci package
## v1.6.0-rc.2
* Use relative path to OCI specification file (config.json) if bundle path is not specified as an argument to the nvidia-container-runtime
## v1.6.0-rc.1
* Add AARCH64 package for Amazon Linux 2
* Include nvidia-container-runtime into nvidia-container-toolkit package
## v1.5.1
* Fix bug where Docker Swarm device selection is ignored if NVIDIA_VISIBLE_DEVICES is also set
* Improve unit testing by using require package and adding coverage reports
* Remove unneeded go dependencies by running go mod tidy
* Move contents of pkg directory to cmd for CLI tools
* Ensure make binary target explicitly sets GOOS
## v1.5.0
* Add dependence on libnvidia-container-tools >= 1.4.0
* Add golang check targets to Makefile
* Add Jenkinsfile definition for build targets
* Move docker.mk to docker folder
## v1.4.2
* Add dependence on libnvidia-container-tools >= 1.3.3
## v1.4.1
* Ignore NVIDIA_VISIBLE_DEVICES for containers with insufficent privileges
* Add dependence on libnvidia-container-tools >= 1.3.2
## v1.4.0
* Add 'compute' capability to list of defaults
* Add dependence on libnvidia-container-tools >= 1.3.1
## v1.3.0
* Promote 1.3.0-rc.2-1 to 1.3.0
* Add dependence on libnvidia-container-tools >= 1.3.0
## v1.3.0-rc.2
* 2c180947 Add more tests for new semantics with device list from volume mounts
* 7c003857 Refactor accepting device lists from volume mounts as a boolean
## v1.3.0-rc.1
* b50d86c1 Update build system to accept a TAG variable for things like rc.x
* fe65573b Add common CI tests for things like golint, gofmt, unit tests, etc.
* da6fbb34 Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
* a7fb3330 Flip build-all targets to run automatically on merge requests
* 8b248b66 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
* da36874e Add new config options to pull device list from mounted files instead of ENVVAR
## v1.2.1
* 4e6e0ed4 Add 'ngx' to list of*all* driver capabilities
* 2f4af743 List config.toml as a config file in the RPM SPEC
## v1.2.0
* 8e0aab46 Fix repo listed in changelog for debian distributions
* 320bb6e4 Update dependence on libnvidia-container to 1.2.0
* 6cfc8097 Update package license to match source license
* e7dc3cbb Fix debian copyright file
* d3aee3e0 Add the 'ngx' driver capability
## v1.1.2
* c32237f3 Add support for parsing Linux Capabilities for older OCI specs
## v1.1.1
* d202aded Update dependence to libnvidia-container 1.1.1
## v1.1.0
* 4e4de762 Update build system to support multi-arch builds
* fcc1d116 Add support for MIG (Multi-Instance GPUs)
* d4ff0416 Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*
* 60f165ad Add no-pivot option to toolkit
## v1.0.5
* Initial release. Replaces older package nvidia-container-runtime-hook. (Closes: #XXXXXX)

View File

@@ -13,7 +13,7 @@ The `nvidia-container-toolkit` resides in this repo directly.
In oder to build the packages, the following command is executed
```sh
./scripts/build-all-components.sh TARGET
./scripts/build-packages.sh TARGET
```
where `TARGET` is a make target that is valid for each of the sub-components.
@@ -21,6 +21,8 @@ These include:
* `ubuntu18.04-amd64`
* `centos8-x86_64`
If no `TARGET` is specified, all valid release targets are built.
The packages are generated in the `dist` folder.
## Testing local changes
@@ -37,9 +39,23 @@ The [test/release](./test/release/) folder contains documentation on how the ins
## Releasing
A utility script [`scripts/release.sh`](./scripts/release.sh) is provided to build
packages required for release. If run without arguments, all supported distribution-architecture combinations are built. A specific distribution-architecture pair can also be provided
```sh
./scripts/release.sh ubuntu18.04-amd64
In order to release packages required for a release, a utility script
[`scripts/release-packages.sh`](./scripts/release-packages.sh) is provided.
This script can be executed as follows:
```bash
GPG_LOCAL_USER="GPG_USER" \
MASTER_KEY_PATH=/path/to/gpg-master.key \
SUB_KEY_PATH=/path/to/gpg-subkey.key \
./scripts/release-packages.sh REPO PACKAGE_REPO_ROOT [REFERENCE]
```
where the `amd64` builds for `ubuntu18.04` are provided as an example.
Where `REPO` is one of `stable` or `experimental`, `PACKAGE_REPO_ROOT` is the local path to the `libnvidia-container` repository checked out to the `gh-pages` branch, and `REFERENCE` is the git SHA that is to be released. If reference is not specified `HEAD` is assumed.
This scripts performs the following basic functions:
* Pulls the package image defined by the `REFERENCE` git SHA from the staging registry,
* Copies the required packages to the package repository at `PACKAGE_REPO_ROOT/REPO`,
* Signs the packages using the specified GPG keys
While the last two are performed, commits are added to the package repository. These can be pushed to the relevant repository.

View File

@@ -16,11 +16,8 @@ DOCKER ?= docker
MKDIR ?= mkdir
DIST_DIR ?= $(CURDIR)/dist
LIB_NAME := nvidia-container-toolkit
LIB_VERSION := 1.6.0
LIB_TAG ?= rc.2
include $(CURDIR)/versions.mk
GOLANG_VERSION := 1.16.3
MODULE := github.com/NVIDIA/nvidia-container-toolkit
# By default run all native docker-based targets
@@ -41,16 +38,21 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
$(info CMD_TARGETS=$(CMD_TARGETS))
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate $(CHECK_TARGETS)
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
DOCKER_TARGETS := $(patsubst %,docker-%, $(TARGETS))
.PHONY: $(TARGETS) $(DOCKER_TARGETS)
ifeq ($(VERSION),)
CLI_VERSION = $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
else
CLI_VERSION = $(VERSION)
endif
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
GOOS ?= linux
binaries: cmds
@@ -59,7 +61,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)
$(CMD_TARGETS): cmd-%:
GOOS=$(GOOS) go build -ldflags "-s -w" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
GOOS=$(GOOS) go build -ldflags "-s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
build:
GOOS=$(GOOS) go build ./...
@@ -93,11 +95,7 @@ ineffassign:
lint:
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
go list -f '{{.Dir}}' $(MODULE)/... | grep -v /internal/ | xargs golint -set_exit_status
lint-internal:
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
go list -f '{{.Dir}}' $(MODULE)/internal/... | xargs golint -set_exit_status
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
misspell:
misspell $(MODULE)/...
@@ -105,6 +103,9 @@ misspell:
vet:
go vet $(MODULE)/...
licenses:
go-licenses csv $(MODULE)/...
COVERAGE_FILE := coverage.out
test: build cmds
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
@@ -145,3 +146,15 @@ $(DOCKER_TARGETS): docker-%: .build-image
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE) \
make $(*)
# Start an interactive shell using the development image.
PHONY: .shell
.shell:
$(DOCKER) run \
--rm \
-ti \
-e GOCACHE=/tmp/.cache \
-v $(PWD):$(PWD) \
-w $(PWD) \
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE)

View File

@@ -1,6 +1,6 @@
# NVIDIA Container Toolkit
[![GitHub license](https://img.shields.io/github/license/NVIDIA/nvidia-container-toolkit?style=flat-square)](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/master/LICENSE)
[![GitHub license](https://img.shields.io/github/license/NVIDIA/nvidia-container-toolkit?style=flat-square)](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/main/LICENSE)
[![Documentation](https://img.shields.io/badge/documentation-wiki-blue.svg?style=flat-square)](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/overview.html)
[![Package repository](https://img.shields.io/badge/packages-repository-b956e8.svg?style=flat-square)](https://nvidia.github.io/libnvidia-container)

View File

@@ -42,20 +42,34 @@ RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" .
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG BASE_DIST
# See https://www.centos.org/centos-linux-eol/
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
( \
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
)
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=utility
ARG ARTIFACTS_ROOT
ARG PACKAGE_DIST
COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
WORKDIR /artifacts/packages
ARG ARTIFACTS_DIR
COPY ${ARTIFACTS_DIR}/* /artifacts/packages/
ARG PACKAGE_VERSION
RUN yum localinstall -y \
libnvidia-container1-${PACKAGE_VERSION}*.rpm \
libnvidia-container-tools-${PACKAGE_VERSION}*.rpm \
nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm
ARG TARGETARCH
ENV PACKAGE_ARCH ${TARGETARCH}
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
yum localinstall -y \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-1.*.rpm \
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*-${PACKAGE_VERSION}*.rpm
WORKDIR /work
@@ -71,6 +85,13 @@ LABEL release="N/A"
LABEL summary="Automatically Configure your Container Runtime for GPU support."
LABEL description="See summary"
COPY ./LICENSE /licenses/LICENSE
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
ENTRYPOINT ["/work/nvidia-toolkit"]
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
yum update -y ${CVE_UPDATES} && \
rm -rf /var/cache/yum/*; \
fi
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -0,0 +1,29 @@
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_DIST
ARG CUDA_VERSION
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ENV NVIDIA_CONTAINER_TOOLKIT_VERSION="${VERSION}"
ARG ARTIFACTS_ROOT
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
WORKDIR /artifacts/packages
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

View File

@@ -40,11 +40,15 @@ COPY . .
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
RUN rm -f /etc/apt/sources.list.d/cuda.list
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
libcap2 \
curl \
&& \
rm -rf /var/lib/apt/lists/*
@@ -52,16 +56,28 @@ ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=utility
ARG ARTIFACTS_ROOT
ARG PACKAGE_DIST
COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
WORKDIR /artifacts/packages
ARG ARTIFACTS_DIR
COPY ${ARTIFACTS_DIR}/* /artifacts/packages/
ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH ${TARGETARCH}
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container"
ARG LIBNVIDIA_CONTAINER0_VERSION
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
fi
RUN dpkg -i \
libnvidia-container1_${PACKAGE_VERSION}*.deb \
libnvidia-container-tools_${PACKAGE_VERSION}*.deb \
nvidia-container-toolkit_${PACKAGE_VERSION}*.deb
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*_${PACKAGE_VERSION}*.deb
WORKDIR /work
@@ -77,6 +93,13 @@ LABEL release="N/A"
LABEL summary="Automatically Configure your Container Runtime for GPU support."
LABEL description="See summary"
COPY ./LICENSE /licenses/LICENSE
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
apt-get update && apt-get upgrade -y ${CVE_UPDATES} && \
rm -rf /var/lib/apt/lists/*; \
fi
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -12,91 +12,114 @@
# See the License for the specific language governing permissions and
# limitations under the License.
DOCKER ?= docker
BUILD_MULTI_ARCH_IMAGES ?= false
DOCKER ?= docker
BUILDX =
ifeq ($(BUILD_MULTI_ARCH_IMAGES),true)
BUILDX = buildx
endif
MKDIR ?= mkdir
DIST_DIR ?= $(CURDIR)/dist
##### Global variables #####
include $(CURDIR)/versions.mk
# TODO: These should be defined ONCE and currently duplicate the version in the
# toolkit makefile.
LIB_VERSION := 1.6.0
LIB_TAG := rc.2
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
CUDA_VERSION ?= 11.4.2
GOLANG_VERSION ?= 1.16.4
ifeq ($(IMAGE_NAME),)
REGISTRY ?= nvidia
IMAGE_NAME := $(REGISTRY)/container-toolkit
endif
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
IMAGE_VERSION := $(VERSION)
IMAGE_TAG ?= $(VERSION)-$(DIST)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
OUT_IMAGE_NAME ?= $(IMAGE_NAME)
OUT_IMAGE_VERSION ?= $(IMAGE_VERSION)
OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DEFAULT_PUSH_TARGET := ubuntu18.04
TARGETS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
DEFAULT_PUSH_TARGET := ubuntu20.04
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7
BUILD_TARGETS := $(patsubst %, build-%, $(TARGETS))
PUSH_TARGETS := $(patsubst %, push-%, $(TARGETS))
TEST_TARGETS := $(patsubst %, test-%, $(TARGETS))
META_TARGETS := packaging
.PHONY: $(TARGETS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
TEST_TARGETS := $(patsubst %,test-%, $(DISTRIBUTIONS))
$(PUSH_TARGETS): push-%:
$(DOCKER) push "$(IMAGE_NAME):$(IMAGE_TAG)"
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
include $(CURDIR)/build/container/native-only.mk
else
include $(CURDIR)/build/container/multi-arch.mk
endif
# For the default push target we also push a short tag equal to the version.
# We skip this for the development release
DEVEL_RELEASE_IMAGE_VERSION ?= devel
ifneq ($(strip $(VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
PUSH_MULTIPLE_TAGS ?= true
ifeq ($(strip $(OUT_IMAGE_VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
PUSH_MULTIPLE_TAGS = false
endif
ifeq ($(PUSH_MULTIPLE_TAGS),true)
push-$(DEFAULT_PUSH_TARGET): push-short
endif
push-short:
$(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(IMAGE_NAME):$(VERSION)"
$(DOCKER) push "$(IMAGE_NAME):$(VERSION)"
push-%: DIST = $(*)
push-short: DIST = $(DEFAULT_PUSH_TARGET)
build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
# Use a generic build target to build the relevant images
$(BUILD_TARGETS): build-%: $(ARTIFACTS_DIR)
$(DOCKER) build --pull \
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
DOCKER_BUILDKIT=1 \
$(DOCKER) $(BUILDX) build --pull \
$(DOCKER_BUILD_OPTIONS) \
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
--tag $(IMAGE) \
--build-arg ARTIFACTS_DIR="$(ARTIFACTS_DIR)" \
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
--build-arg BASE_DIST="$(BASE_DIST)" \
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg VERSION="$(VERSION)" \
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
-f $(DOCKERFILE) \
$(CURDIR)
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
build-ubuntu%: BASE_DIST = $(*)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: ARTIFACTS_DIR = $(ARTIFACTS_ROOT)/$(*)/amd64
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
build-ubuntu18.04: BASE_DIST := ubuntu18.04
build-ubuntu20.04: BASE_DIST := ubuntu20.04
build-ubi8: DOCKERFILE_SUFFIX := centos
# TODO: Update this to use the centos8 packages
build-ubi8: ARTIFACTS_DIR = $(ARTIFACTS_ROOT)/centos7/x86_64
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-ubi8: BASE_DIST := ubi8
build-ubi8: DOCKERFILE_SUFFIX := centos
build-ubi8: PACKAGE_DIST = centos8
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-centos%: DOCKERFILE_SUFFIX := centos
build-centos%: ARTIFACTS_DIR = $(ARTIFACTS_ROOT)/$(*)/x86_64
build-centos%: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-centos7: BASE_DIST = $(*)
build-centos7: DOCKERFILE_SUFFIX := centos
build-centos7: PACKAGE_DIST = $(BASE_DIST)
build-centos7: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-centos7: BASE_DIST := centos7
build-centos8: BASE_DIST := centos8
build-packaging: BASE_DIST := ubuntu20.04
build-packaging: DOCKERFILE_SUFFIX := packaging
build-packaging: PACKAGE_ARCH := amd64
build-packaging: PACKAGE_DIST = all
build-packaging: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
# Test targets
test-%: DIST = $(*)
@@ -108,3 +131,22 @@ $(TEST_TARGETS): test-%:
$(IMAGE) \
--no-cleanup-on-error
.PHONY: test-packaging
test-packaging: DIST = packaging
test-packaging:
@echo "Testing package image contents"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/amazonlinux2/aarch64" || echo "Missing amazonlinux2/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/amazonlinux2/x86_64" || echo "Missing amazonlinux2/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/ppc64le" || echo "Missing centos7/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian10/amd64" || echo "Missing debian10/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/debian9/amd64" || echo "Missing debian9/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/opensuse-leap15.1/x86_64" || echo "Missing opensuse-leap15.1/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu16.04/amd64" || echo "Missing ubuntu16.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu16.04/ppc64le" || echo "Missing ubuntu16.04/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"

View File

@@ -0,0 +1,37 @@
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PUSH_ON_BUILD ?= false
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
REGCTL ?= regctl
$(PUSH_TARGETS): push-%:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE)
push-short:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)
# We only have x86_64 packages for centos7
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
# We only generate amd64 image for ubuntu18.04
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
# We only generate a single image for packaging targets
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

View File

@@ -0,0 +1,23 @@
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
$(PUSH_TARGETS): push-%:
$(DOCKER) tag "$(IMAGE)" "$(OUT_IMAGE)"
$(DOCKER) push "$(OUT_IMAGE)"
push-short:
$(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)"
$(DOCKER) push "$(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)"

View File

@@ -0,0 +1,83 @@
package main
import (
"log"
"strings"
)
const (
allDriverCapabilities = DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx")
defaultDriverCapabilities = DriverCapabilities("utility,compute")
none = DriverCapabilities("")
all = DriverCapabilities("all")
)
func capabilityToCLI(cap string) string {
switch cap {
case "compute":
return "--compute"
case "compat32":
return "--compat32"
case "graphics":
return "--graphics"
case "utility":
return "--utility"
case "video":
return "--video"
case "display":
return "--display"
case "ngx":
return "--ngx"
default:
log.Panicln("unknown driver capability:", cap)
}
return ""
}
// DriverCapabilities is used to process the NVIDIA_DRIVER_CAPABILITIES environment
// variable. Operations include default values, filtering, and handling meta values such as "all"
type DriverCapabilities string
// Intersection returns intersection between two sets of capabilities.
func (d DriverCapabilities) Intersection(capabilities DriverCapabilities) DriverCapabilities {
if capabilities == all {
return d
}
if d == all {
return capabilities
}
lookup := make(map[string]bool)
for _, c := range d.list() {
lookup[c] = true
}
var found []string
for _, c := range capabilities.list() {
if lookup[c] {
found = append(found, c)
}
}
intersection := DriverCapabilities(strings.Join(found, ","))
return intersection
}
// String returns the string representation of the driver capabilities
func (d DriverCapabilities) String() string {
return string(d)
}
// list returns the driver capabilities as a list
func (d DriverCapabilities) list() []string {
var caps []string
for _, c := range strings.Split(string(d), ",") {
trimmed := strings.TrimSpace(c)
if len(trimmed) == 0 {
continue
}
caps = append(caps, trimmed)
}
return caps
}

View File

@@ -0,0 +1,134 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestDriverCapabilitiesIntersection(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
supportedCapabilities DriverCapabilities
expectedIntersection DriverCapabilities
}{
{
capabilities: none,
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: all,
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: all,
supportedCapabilities: allDriverCapabilities,
expectedIntersection: allDriverCapabilities,
},
{
capabilities: allDriverCapabilities,
supportedCapabilities: all,
expectedIntersection: allDriverCapabilities,
},
{
capabilities: none,
supportedCapabilities: all,
expectedIntersection: none,
},
{
capabilities: none,
supportedCapabilities: DriverCapabilities("cap1"),
expectedIntersection: none,
},
{
capabilities: DriverCapabilities("cap0,cap1"),
supportedCapabilities: DriverCapabilities("cap1,cap0"),
expectedIntersection: DriverCapabilities("cap0,cap1"),
},
{
capabilities: defaultDriverCapabilities,
supportedCapabilities: allDriverCapabilities,
expectedIntersection: defaultDriverCapabilities,
},
{
capabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
supportedCapabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
expectedIntersection: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
{
capabilities: DriverCapabilities("cap1"),
supportedCapabilities: none,
expectedIntersection: none,
},
{
capabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display,ngx"),
supportedCapabilities: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
expectedIntersection: DriverCapabilities("compute,compat32,graphics,utility,video,display"),
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
intersection := tc.supportedCapabilities.Intersection(tc.capabilities)
require.EqualValues(t, tc.expectedIntersection, intersection)
})
}
}
func TestDriverCapabilitiesList(t *testing.T) {
testCases := []struct {
capabilities DriverCapabilities
expected []string
}{
{
capabilities: DriverCapabilities(""),
},
{
capabilities: DriverCapabilities(" "),
},
{
capabilities: DriverCapabilities(","),
},
{
capabilities: DriverCapabilities(",cap"),
expected: []string{"cap"},
},
{
capabilities: DriverCapabilities("cap,"),
expected: []string{"cap"},
},
{
capabilities: DriverCapabilities("cap0,,cap1"),
expected: []string{"cap0", "cap1"},
},
{
capabilities: DriverCapabilities("cap1,cap0,cap3"),
expected: []string{"cap1", "cap0", "cap3"},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
require.EqualValues(t, tc.expected, tc.capabilities.list())
})
}
}

View File

@@ -7,9 +7,9 @@ import (
"os"
"path"
"path/filepath"
"strconv"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"golang.org/x/mod/semver"
)
@@ -26,11 +26,6 @@ const (
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)
const (
allDriverCapabilities = "compute,compat32,graphics,utility,video,display,ngx"
defaultDriverCapabilities = "utility,compute"
)
const (
capSysAdmin = "CAP_SYS_ADMIN"
)
@@ -109,32 +104,6 @@ type HookState struct {
BundlePath string `json:"bundlePath"`
}
func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
if _, err := fmt.Sscanf(cudaVersion, "%d.%d.%d\n", &vmaj, &vmin, &vpatch); err != nil {
vpatch = 0
if _, err := fmt.Sscanf(cudaVersion, "%d.%d\n", &vmaj, &vmin); err != nil {
vmin = 0
if _, err := fmt.Sscanf(cudaVersion, "%d\n", &vmaj); err != nil {
log.Panicln("invalid CUDA version:", cudaVersion)
}
}
}
return
}
func getEnvMap(e []string) (m map[string]string) {
m = make(map[string]string)
for _, s := range e {
p := strings.SplitN(s, "=", 2)
if len(p) != 2 {
log.Panicln("environment error")
}
m[p[0]] = p[1]
}
return
}
func loadSpec(path string) (spec *Spec) {
f, err := os.Open(path)
if err != nil {
@@ -196,13 +165,7 @@ func isPrivileged(s *Spec) bool {
return false
}
func isLegacyCUDAImage(env map[string]string) bool {
legacyCudaVersion := env[envCUDAVersion]
cudaRequire := env[envNVRequireCUDA]
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
func getDevicesFromEnvvar(image image.CUDA) *string {
// Build a list of envvars to consider.
envVars := []string{envNVVisibleDevices}
if envSwarmGPU != nil {
@@ -210,35 +173,14 @@ func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
envVars = append([]string{*envSwarmGPU}, envVars...)
}
// Grab a reference to devices from the first envvar
// in the list that actually exists in the environment.
var devices *string
for _, envVar := range envVars {
if devs, ok := env[envVar]; ok {
devices = &devs
break
}
}
// Environment variable unset with legacy image: default to "all".
if devices == nil && legacyImage {
all := "all"
return &all
}
// Environment variable unset or empty or "void": return nil
if devices == nil || len(*devices) == 0 || *devices == "void" {
devices := image.DevicesFromEnvvars(envVars...)
if len(devices) == 0 {
return nil
}
// Environment variable set to "none": reset to "".
if *devices == "none" {
empty := ""
return &empty
}
devicesString := strings.Join(devices, ",")
// Any other value.
return devices
return &devicesString
}
func getDevicesFromMounts(mounts []Mount) *string {
@@ -278,7 +220,7 @@ func getDevicesFromMounts(mounts []Mount) *string {
return &ret
}
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *string {
// If enabled, try and get the device list from volume mounts first
if hookConfig.AcceptDeviceListAsVolumeMounts {
devices := getDevicesFromMounts(mounts)
@@ -288,7 +230,7 @@ func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, p
}
// Fallback to reading from the environment variable if privileges are correct
devices := getDevicesFromEnvvar(env, legacyImage)
devices := getDevicesFromEnvvar(image)
if devices == nil {
return nil
}
@@ -316,57 +258,35 @@ func getMigMonitorDevices(env map[string]string) *string {
return nil
}
func getDriverCapabilities(env map[string]string, legacyImage bool) *string {
// Grab a reference to the capabilities from the envvar
// if it actually exists in the environment.
var capabilities *string
if caps, ok := env[envNVDriverCapabilities]; ok {
capabilities = &caps
func getDriverCapabilities(env map[string]string, supportedDriverCapabilities DriverCapabilities, legacyImage bool) DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities
capabilities := supportedDriverCapabilities.Intersection(defaultDriverCapabilities)
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
if !capsEnvSpecified && legacyImage {
// Environment variable unset with legacy image: set all capabilities.
return supportedDriverCapabilities
}
// Environment variable unset with legacy image: set all capabilities.
if capabilities == nil && legacyImage {
allCaps := allDriverCapabilities
return &allCaps
if capsEnvSpecified && len(capsEnv) > 0 {
// If the envvironment variable is specified and is non-empty, use the capabilities value
envCapabilities := DriverCapabilities(capsEnv)
capabilities = supportedDriverCapabilities.Intersection(envCapabilities)
if envCapabilities != all && capabilities != envCapabilities {
log.Panicln(fmt.Errorf("unsupported capabilities found in '%v' (allowed '%v')", envCapabilities, capabilities))
}
}
// Environment variable unset or set but empty: set default capabilities.
if capabilities == nil || len(*capabilities) == 0 {
defaultCaps := defaultDriverCapabilities
return &defaultCaps
}
// Environment variable set to "all": set all capabilities.
if *capabilities == "all" {
allCaps := allDriverCapabilities
return &allCaps
}
// Any other value
return capabilities
}
func getRequirements(env map[string]string, legacyImage bool) []string {
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
for name, value := range env {
if strings.HasPrefix(name, envNVRequirePrefix) {
requirements = append(requirements, value)
}
}
if legacyImage {
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
requirements = append(requirements, cudaRequire)
}
return requirements
}
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
legacyImage := isLegacyCUDAImage(env)
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
legacyImage := image.IsLegacy()
var devices string
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
if d := getDevices(hookConfig, image, mounts, privileged); d != nil {
devices = *d
} else {
// 'nil' devices means this is not a GPU container.
@@ -374,7 +294,7 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
}
var migConfigDevices string
if d := getMigConfigDevices(env); d != nil {
if d := getMigConfigDevices(image); d != nil {
migConfigDevices = *d
}
if !privileged && migConfigDevices != "" {
@@ -382,22 +302,21 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
}
var migMonitorDevices string
if d := getMigMonitorDevices(env); d != nil {
if d := getMigMonitorDevices(image); d != nil {
migMonitorDevices = *d
}
if !privileged && migMonitorDevices != "" {
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
var driverCapabilities string
if c := getDriverCapabilities(env, legacyImage); c != nil {
driverCapabilities = *c
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
requirements, err := image.GetRequirements()
if err != nil {
log.Panicln("failed to get requirements", err)
}
requirements := getRequirements(env, legacyImage)
// Don't fail on invalid values.
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
disableRequire := image.HasDisableRequire()
return &nvidiaConfig{
Devices: devices,
@@ -423,13 +342,17 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
s := loadSpec(path.Join(b, "config.json"))
env := getEnvMap(s.Process.Env)
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
if err != nil {
log.Panicln(err)
}
privileged := isPrivileged(s)
envSwarmGPU = hook.SwarmResource
return containerConfig{
Pid: h.Pid,
Rootfs: s.Root.Path,
Env: env,
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
Env: image,
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
}
}

View File

@@ -4,6 +4,7 @@ import (
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/stretchr/testify/require"
)
@@ -12,6 +13,7 @@ func TestGetNvidiaConfig(t *testing.T) {
description string
env map[string]string
privileged bool
hookConfig *HookConfig
expectedConfig *nvidiaConfig
expectedPanic bool
}{
@@ -35,7 +37,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: allDriverCapabilities,
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -49,7 +51,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: allDriverCapabilities,
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -81,7 +83,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "",
DriverCapabilities: allDriverCapabilities,
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -95,7 +97,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities,
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -110,7 +112,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities,
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -125,7 +127,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities,
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -135,12 +137,12 @@ func TestGetNvidiaConfig(t *testing.T) {
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVDriverCapabilities: "video,display",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -150,14 +152,14 @@ func TestGetNvidiaConfig(t *testing.T) {
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVDriverCapabilities: "video,display",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: false,
},
@@ -167,7 +169,7 @@ func TestGetNvidiaConfig(t *testing.T) {
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVDriverCapabilities: "video,display",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
envNVDisableRequire: "true",
@@ -175,7 +177,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: true,
},
@@ -206,7 +208,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: defaultDriverCapabilities,
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -238,7 +240,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "",
DriverCapabilities: defaultDriverCapabilities,
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -252,7 +254,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities,
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -267,7 +269,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities,
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -282,7 +284,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities,
DriverCapabilities: allDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -292,12 +294,12 @@ func TestGetNvidiaConfig(t *testing.T) {
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVDriverCapabilities: "video,display",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -307,14 +309,14 @@ func TestGetNvidiaConfig(t *testing.T) {
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVDriverCapabilities: "video,display",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: false,
},
@@ -324,7 +326,7 @@ func TestGetNvidiaConfig(t *testing.T) {
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVDriverCapabilities: "video,display",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
envNVDisableRequire: "true",
@@ -332,7 +334,7 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
DriverCapabilities: "video,display",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: true,
},
@@ -346,7 +348,7 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: defaultDriverCapabilities,
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{},
DisableRequire: false,
},
@@ -362,7 +364,7 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
MigConfigDevices: "mig0,mig1",
DriverCapabilities: defaultDriverCapabilities,
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -388,7 +390,7 @@ func TestGetNvidiaConfig(t *testing.T) {
expectedConfig: &nvidiaConfig{
Devices: "all",
MigMonitorDevices: "mig0,mig1",
DriverCapabilities: defaultDriverCapabilities,
DriverCapabilities: defaultDriverCapabilities.String(),
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
@@ -403,14 +405,62 @@ func TestGetNvidiaConfig(t *testing.T) {
privileged: false,
expectedPanic: true,
},
{
description: "Hook config set as driver-capabilities-all",
env: map[string]string{
envNVVisibleDevices: "all",
envNVDriverCapabilities: "all",
},
privileged: true,
hookConfig: &HookConfig{
SupportedDriverCapabilities: "video,display",
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: "video,display",
},
},
{
description: "Hook config set, envvar sets driver-capabilities",
env: map[string]string{
envNVVisibleDevices: "all",
envNVDriverCapabilities: "video,display",
},
privileged: true,
hookConfig: &HookConfig{
SupportedDriverCapabilities: "video,display,compute,utility",
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: "video,display",
},
},
{
description: "Hook config set, envvar unset sets default driver-capabilities",
env: map[string]string{
envNVVisibleDevices: "all",
},
privileged: true,
hookConfig: &HookConfig{
SupportedDriverCapabilities: "video,display,utility,compute",
},
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: defaultDriverCapabilities.String(),
},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
// Wrap the call to getNvidiaConfig() in a closure.
var config *nvidiaConfig
getConfig := func() {
hookConfig := getDefaultHookConfig()
config = getNvidiaConfig(&hookConfig, tc.env, nil, tc.privileged)
hookConfig := tc.hookConfig
if hookConfig == nil {
defaultConfig := getDefaultHookConfig()
hookConfig = &defaultConfig
}
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
}
// For any tests that are expected to panic, make sure they do.
@@ -622,7 +672,7 @@ func TestDeviceListSourcePriority(t *testing.T) {
hookConfig := getDefaultHookConfig()
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false)
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
}
// For all other tests, just grab the devices and check the results
@@ -644,7 +694,6 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description string
envSwarmGPU *string
env map[string]string
legacyImage bool
expectedDevices *string
}{
{
@@ -680,13 +729,15 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
env: map[string]string{
envNVVisibleDevices: gpuID,
envCUDAVersion: "legacy",
},
legacyImage: true,
expectedDevices: &gpuID,
},
{
description: "empty env returns all for legacy image",
legacyImage: true,
description: "empty env returns all for legacy image",
env: map[string]string{
envCUDAVersion: "legacy",
},
expectedDevices: &all,
},
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
@@ -732,16 +783,16 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
env: map[string]string{
envNVVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
envCUDAVersion: "legacy",
},
legacyImage: true,
expectedDevices: &gpuID,
},
{
description: "empty env returns all for legacy image",
env: map[string]string{
envDockerResourceGPUs: anotherGPUID,
envCUDAVersion: "legacy",
},
legacyImage: true,
expectedDevices: &all,
},
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
@@ -785,8 +836,8 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
envSwarmGPU: &envDockerResourceGPUs,
env: map[string]string{
envDockerResourceGPUs: gpuID,
envCUDAVersion: "legacy",
},
legacyImage: true,
expectedDevices: &gpuID,
},
{
@@ -811,7 +862,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
for i, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
envSwarmGPU = tc.envSwarmGPU
devices := getDevicesFromEnvvar(tc.env, tc.legacyImage)
devices := getDevicesFromEnvvar(image.CUDA(tc.env))
if tc.expectedDevices == nil {
require.Nil(t, devices, "%d: %v", i, tc)
return
@@ -822,3 +873,119 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
})
}
}
func TestGetDriverCapabilities(t *testing.T) {
supportedCapabilities := "compute,utility,display,video"
testCases := []struct {
description string
env map[string]string
legacyImage bool
supportedCapabilities string
expectedPanic bool
expectedCapabilities string
}{
{
description: "Env is set for legacy image",
env: map[string]string{
envNVDriverCapabilities: "display,video",
},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: "display,video",
},
{
description: "Env is all for legacy image",
env: map[string]string{
envNVDriverCapabilities: "all",
},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: supportedCapabilities,
},
{
description: "Env is empty for legacy image",
env: map[string]string{
envNVDriverCapabilities: "",
},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: defaultDriverCapabilities.String(),
},
{
description: "Env unset for legacy image is 'all'",
env: map[string]string{},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: supportedCapabilities,
},
{
description: "Env is set for modern image",
env: map[string]string{
envNVDriverCapabilities: "display,video",
},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: "display,video",
},
{
description: "Env unset for modern image is default",
env: map[string]string{},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: defaultDriverCapabilities.String(),
},
{
description: "Env is all for modern image",
env: map[string]string{
envNVDriverCapabilities: "all",
},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: supportedCapabilities,
},
{
description: "Env is empty for modern image",
env: map[string]string{
envNVDriverCapabilities: "",
},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
expectedCapabilities: defaultDriverCapabilities.String(),
},
{
description: "Invalid capabilities panic",
env: map[string]string{
envNVDriverCapabilities: "compute,utility",
},
supportedCapabilities: "not-compute,not-utility",
expectedPanic: true,
},
{
description: "Default is restricted for modern image",
legacyImage: false,
supportedCapabilities: "compute",
expectedCapabilities: "compute",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
var capabilites DriverCapabilities
getDriverCapabilities := func() {
supportedCapabilities := DriverCapabilities(tc.supportedCapabilities)
capabilites = getDriverCapabilities(tc.env, supportedCapabilities, tc.legacyImage)
}
if tc.expectedPanic {
require.Panics(t, getDriverCapabilities)
return
}
getDriverCapabilities()
require.EqualValues(t, tc.expectedCapabilities, capabilites)
})
}
}

View File

@@ -7,6 +7,7 @@ import (
"reflect"
"github.com/BurntSushi/toml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
)
const (
@@ -33,22 +34,25 @@ type CLIConfig struct {
Ldconfig *string `toml:"ldconfig"`
}
// HookConfig : options for the nvidia-container-toolkit.
// HookConfig : options for the nvidia-container-runtime-hook.
type HookConfig struct {
DisableRequire bool `toml:"disable-require"`
SwarmResource *string `toml:"swarm-resource"`
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
DisableRequire bool `toml:"disable-require"`
SwarmResource *string `toml:"swarm-resource"`
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"`
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"`
}
func getDefaultHookConfig() (config HookConfig) {
func getDefaultHookConfig() HookConfig {
return HookConfig{
DisableRequire: false,
SwarmResource: nil,
AcceptEnvvarUnprivileged: true,
AcceptDeviceListAsVolumeMounts: false,
SupportedDriverCapabilities: allDriverCapabilities,
NvidiaContainerCLI: CLIConfig{
Root: nil,
Path: nil,
@@ -61,6 +65,7 @@ func getDefaultHookConfig() (config HookConfig) {
User: nil,
Ldconfig: nil,
},
NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(),
}
}
@@ -85,6 +90,15 @@ func getHookConfig() (config HookConfig) {
}
}
if config.SupportedDriverCapabilities == all {
config.SupportedDriverCapabilities = allDriverCapabilities
}
// We ensure that the supported-driver-capabilites option is a subset of allDriverCapabilities
if intersection := allDriverCapabilities.Intersection(config.SupportedDriverCapabilities); intersection != config.SupportedDriverCapabilities {
configName := config.getConfigOption("SupportedDriverCapabilities")
log.Panicf("Invalid value for config option '%v'; %v (supported: %v)\n", configName, config.SupportedDriverCapabilities, allDriverCapabilities)
}
return config
}

View File

@@ -0,0 +1,105 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"fmt"
"os"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetHookConfig(t *testing.T) {
testCases := []struct {
lines []string
expectedPanic bool
expectedDriverCapabilities DriverCapabilities
}{
{
expectedDriverCapabilities: allDriverCapabilities,
},
{
lines: []string{
"supported-driver-capabilities = \"all\"",
},
expectedDriverCapabilities: allDriverCapabilities,
},
{
lines: []string{
"supported-driver-capabilities = \"compute,utility,not-compute\"",
},
expectedPanic: true,
},
{
lines: []string{},
expectedDriverCapabilities: allDriverCapabilities,
},
{
lines: []string{
"supported-driver-capabilities = \"\"",
},
expectedDriverCapabilities: none,
},
{
lines: []string{
"supported-driver-capabilities = \"utility,compute\"",
},
expectedDriverCapabilities: DriverCapabilities("utility,compute"),
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
var filename string
defer func() {
if len(filename) > 0 {
os.Remove(filename)
}
configflag = nil
}()
if tc.lines != nil {
configFile, err := os.CreateTemp("", "*.toml")
require.NoError(t, err)
defer configFile.Close()
filename = configFile.Name()
configflag = &filename
for _, line := range tc.lines {
_, err := configFile.WriteString(fmt.Sprintf("%s\n", line))
require.NoError(t, err)
}
}
var config HookConfig
getHookConfig := func() {
config = getHookConfig()
}
if tc.expectedPanic {
require.Panics(t, getHookConfig)
return
}
getHookConfig()
require.EqualValues(t, tc.expectedDriverCapabilities, config.SupportedDriverCapabilities)
})
}
}

View File

@@ -7,51 +7,6 @@ import (
"github.com/stretchr/testify/require"
)
func TestParseCudaVersionValid(t *testing.T) {
var tests = []struct {
version string
expected [3]uint32
}{
{"0", [3]uint32{0, 0, 0}},
{"8", [3]uint32{8, 0, 0}},
{"7.5", [3]uint32{7, 5, 0}},
{"9.0.116", [3]uint32{9, 0, 116}},
{"4294967295.4294967295.4294967295", [3]uint32{4294967295, 4294967295, 4294967295}},
}
for i, c := range tests {
vmaj, vmin, vpatch := parseCudaVersion(c.version)
version := [3]uint32{vmaj, vmin, vpatch}
require.Equal(t, c.expected, version, "%d: %v", i, c)
}
}
func TestParseCudaVersionInvalid(t *testing.T) {
var tests = []string{
"foo",
"foo.5.10",
"9.0.116.50",
"9.0.116foo",
"7.foo",
"9.0.bar",
"9.4294967296",
"9.0.116.",
"9..0",
"9.",
".5.10",
"-9",
"+9",
"-9.1.116",
"-9.-1.-116",
}
for _, c := range tests {
require.Panics(t, func() {
parseCudaVersion(c)
}, "parseCudaVersion(%v)", c)
}
}
func TestIsPrivileged(t *testing.T) {
var tests = []struct {
spec string

View File

@@ -6,20 +6,21 @@ import (
"log"
"os"
"os/exec"
"path"
"path/filepath"
"runtime"
"runtime/debug"
"strconv"
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
var (
debugflag = flag.Bool("debug", false, "enable debug output")
configflag = flag.String("config", "", "configuration file")
defaultPATH = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
debugflag = flag.Bool("debug", false, "enable debug output")
versionflag = flag.Bool("version", false, "enable version output")
configflag = flag.String("config", "", "configuration file")
)
func exit() {
@@ -35,28 +36,16 @@ func exit() {
os.Exit(0)
}
func getPATH(config CLIConfig) string {
dirs := filepath.SplitList(os.Getenv("PATH"))
// directories from the hook environment have higher precedence
dirs = append(dirs, defaultPATH...)
if config.Root != nil {
rootDirs := []string{}
for _, dir := range dirs {
rootDirs = append(rootDirs, path.Join(*config.Root, dir))
}
// directories with the root prefix have higher precedence
dirs = append(rootDirs, dirs...)
}
return strings.Join(dirs, ":")
}
func getCLIPath(config CLIConfig) string {
if config.Path != nil {
return *config.Path
}
if err := os.Setenv("PATH", getPATH(config)); err != nil {
var root string
if config.Root != nil {
root = *config.Root
}
if err := os.Setenv("PATH", lookup.GetPath(root)); err != nil {
log.Panicln("couldn't set PATH variable:", err)
}
@@ -85,6 +74,10 @@ func doPrestart() {
hook := getHookConfig()
cli := hook.NvidiaContainerCLI
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
}
container := getContainerConfig(hook)
nvidia := container.Nvidia
if nvidia == nil {
@@ -167,6 +160,11 @@ func main() {
flag.Usage = usage
flag.Parse()
if *versionflag {
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime Hook", info.GetVersionString())
return
}
args := flag.Args()
if len(args) == 0 {
flag.Usage()
@@ -186,3 +184,12 @@ func main() {
os.Exit(2)
}
}
// logInterceptor implements the info.Logger interface to allow for logging from this function.
type logInterceptor struct{}
func (l *logInterceptor) Infof(format string, args ...interface{}) {
log.Printf(format, args...)
}
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}

View File

@@ -0,0 +1,87 @@
# The NVIDIA Container Runtime
The NVIDIA Container Runtime is a shim for OCI-compliant low-level runtimes such as [runc](https://github.com/opencontainers/runc). When a `create` command is detected, the incoming [OCI runtime specification](https://github.com/opencontainers/runtime-spec) is modified in place and the command is forwarded to the low-level runtime.
## Configuration
The NVIDIA Container Runtime uses file-based configuration, with the config stored in `/etc/nvidia-container-runtime/config.toml`. The `/etc` path can be overridden using the `XDG_CONFIG_HOME` environment variable with the `${XDG_CONFIG_HOME}/nvidia-container-runtime/config.toml` file used instead if this environment variable is set.
This config file may contain options for other components of the NVIDIA container stack and for the NVIDIA Container Runtime, the relevant config section is `nvidia-container-runtime`
### Logging
The `log-level` config option (default: `"info"`) specifies the log level to use and the `debug` option, if set, specifies a log file to which logs for the NVIDIA Container Runtime must be written.
In addition to this, the NVIDIA Container Runtime considers the value of `--log` and `--log-format` flags that may be passed to it by a container runtime such as docker or containerd. If the `--debug` flag is present the log-level specified in the config file is overridden as `"debug"`.
### Low-level Runtime Path
The `runtimes` config option allows for the low-level runtime to be specified. The first entry in this list that is an existing executable file is used as the low-level runtime. If the entry is not a path, the `PATH` is searched for a matching executable. If the entry is a path this is checked instead.
The default value for this setting is:
```toml
runtimes = [
"docker-runc",
"runc",
]
```
and if, for example, `crun` is to be used instead this can be changed to:
```toml
runtimes = [
"crun",
]
```
### Runtime Mode
The `mode` config option (default `"auto"`) controls the high-level behaviour of the runtime.
#### Auto Mode
When `mode` is set to `"auto"`, the runtime employs heuristics to determine which mode to use based on, for example, the platform where the runtime is being run.
#### Legacy Mode
When `mode` is set to `"legacy"`, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from the [libnvidia-container](https://github.com/NVIDIA/libnvidia-container) project.
#### CSV Mode
When `mode` is set to `"csv"`, CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` define the devices and mounts that are to be injected into a container when it is created. The search path for the files can be overridden by modifying the `nvidia-container-runtime.modes.csv.mount-spec-path` in the config as below:
```toml
[nvidia-container-runtime]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
```
This mode is primarily targeted at Tegra-based systems without NVML available.
### Notes on using the docker CLI
Note that only the `"legacy"` NVIDIA Container Runtime mode is directly compatible with the `--gpus` flag implemented by the `docker` CLI (assuming the NVIDIA Container Runtime is not used). The reason for this is that `docker` inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification.
If a different mode is explicitly set or detected, the NVIDIA Container Runtime Hook will raise the following error when `--gpus` is set:
```
$ docker run --rm --gpus all ubuntu:18.04
docker: Error response from daemon: failed to create shim: OCI runtime create failed: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: Running hook #0:: error running hook: exit status 1, stdout: , stderr: Auto-detected mode as 'csv'
invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.: unknown.
```
Here NVIDIA Container Runtime must be used explicitly. The recommended way to do this is to specify the `--runtime=nvidia` command line argument as part of the `docker run` commmand as follows:
```
$ docker run --rm --gpus all --runtime=nvidia ubuntu:18.04
```
Alternatively the NVIDIA Container Runtime can be set as the default runtime for docker. This can be done by modifying the `/etc/docker/daemon.json` file as follows:
```json
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
```

View File

@@ -20,60 +20,220 @@ import (
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"github.com/sirupsen/logrus"
"github.com/tsaikd/KDGoLib/logrusutil"
)
// Logger adds a way to manage output to a log file to a logrus.Logger
type Logger struct {
*logrus.Logger
previousOutput io.Writer
logFile *os.File
previousLogger *logrus.Logger
logFiles []*os.File
}
// NewLogger constructs a Logger with a preddefined formatter
// NewLogger creates an empty logger
func NewLogger() *Logger {
logrusLogger := logrus.New()
formatter := &logrusutil.ConsoleLogFormatter{
TimestampFormat: "2006/01/02 15:04:07",
Flag: logrusutil.Ltime,
return &Logger{
Logger: logrus.New(),
}
logger := &Logger{
Logger: logrusLogger,
}
logger.SetFormatter(formatter)
return logger
}
// LogToFile opens the specified file for appending and sets the logger to
// output to the opened file. A reference to the file pointer is stored to
// allow this to be closed.
func (l *Logger) LogToFile(filename string) error {
logFile, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("error opening debug log file: %v", err)
// UpdateLogger constructs a Logger with a preddefined formatter
func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, error) {
configFromArgs := parseArgs(argv)
level, logLevelError := configFromArgs.getLevel(logLevel)
var logFiles []*os.File
var argLogFileError error
// We don't create log files if the version argument is supplied
if !configFromArgs.version {
configLogFile, err := createLogFile(filename)
if err != nil {
return logger, fmt.Errorf("error opening debug log file: %v", err)
}
if configLogFile != nil {
logFiles = append(logFiles, configLogFile)
}
argLogFile, err := createLogFile(configFromArgs.file)
if argLogFile != nil {
logFiles = append(logFiles, argLogFile)
}
argLogFileError = err
}
l.logFile = logFile
l.previousOutput = l.Out
l.SetOutput(logFile)
return nil
}
// CloseFile closes the log file (if any) and resets the logger output to what it
// was before LogToFile was called.
func (l *Logger) CloseFile() error {
if l.logFile == nil {
return nil
l := &Logger{
Logger: logrus.New(),
previousLogger: logger.Logger,
logFiles: logFiles,
}
logFile := l.logFile
l.SetOutput(l.previousOutput)
l.logFile = nil
return logFile.Close()
l.SetLevel(level)
if level == logrus.DebugLevel {
logrus.SetReportCaller(true)
// Shorten function and file names reported by the logger, by
// trimming common "github.com/opencontainers/runc" prefix.
// This is only done for text formatter.
_, file, _, _ := runtime.Caller(0)
prefix := filepath.Dir(file) + "/"
logrus.SetFormatter(&logrus.TextFormatter{
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
function := strings.TrimPrefix(f.Function, prefix) + "()"
fileLine := strings.TrimPrefix(f.File, prefix) + ":" + strconv.Itoa(f.Line)
return function, fileLine
},
})
}
if configFromArgs.format == "json" {
l.SetFormatter(new(logrus.JSONFormatter))
}
if len(logFiles) == 0 {
l.SetOutput(io.Discard)
} else if len(logFiles) == 1 {
l.SetOutput(logFiles[0])
} else if len(logFiles) > 1 {
var writers []io.Writer
for _, f := range logFiles {
writers = append(writers, f)
}
l.SetOutput(io.MultiWriter(writers...))
}
if logLevelError != nil {
l.Warn(logLevelError)
}
if argLogFileError != nil {
l.Warnf("Failed to open log file: %v", argLogFileError)
}
return l, nil
}
// Reset closes the log file (if any) and resets the logger output to what it
// was before UpdateLogger was called.
func (l *Logger) Reset() error {
defer func() {
previous := l.previousLogger
if previous == nil {
previous = logrus.New()
}
logger = &Logger{Logger: previous}
}()
var errs []error
for _, f := range l.logFiles {
err := f.Close()
if err != nil {
errs = append(errs, err)
}
}
var err error
for _, e := range errs {
if err == nil {
err = e
continue
}
return fmt.Errorf("%v; %w", e, err)
}
return err
}
func createLogFile(filename string) (*os.File, error) {
if filename != "" && filename != os.DevNull {
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
}
return nil, nil
}
type loggerConfig struct {
file string
format string
debug bool
version bool
}
func (c loggerConfig) getLevel(logLevel string) (logrus.Level, error) {
if c.debug {
return logrus.DebugLevel, nil
}
if logLevel, err := logrus.ParseLevel(logLevel); err == nil {
return logLevel, nil
}
return logrus.InfoLevel, fmt.Errorf("invalid log-level '%v'", logLevel)
}
// Informed by Taken from https://github.com/opencontainers/runc/blob/7fd8b57001f5bfa102e89cb434d96bf71f7c1d35/main.go#L182
func parseArgs(args []string) loggerConfig {
c := loggerConfig{}
expected := map[string]*string{
"log-format": &c.format,
"log": &c.file,
}
found := make(map[string]bool)
for i := 0; i < len(args); i++ {
if len(found) == 4 {
break
}
param := args[i]
parts := strings.SplitN(param, "=", 2)
trimmed := strings.TrimLeft(parts[0], "-")
// If this is not a flag we continue
if parts[0] == trimmed {
continue
}
// Check the version flag
if trimmed == "version" {
c.version = true
found["version"] = true
// For the version flag we don't process any other flags
continue
}
// Check the debug flag
if trimmed == "debug" {
c.debug = true
found["debug"] = true
continue
}
destination, exists := expected[trimmed]
if !exists {
continue
}
var value string
if len(parts) == 2 {
value = parts[2]
} else if i+1 < len(args) {
value = args[i+1]
i++
} else {
continue
}
*destination = value
found[trimmed] = true
}
return c
}

View File

@@ -3,87 +3,82 @@ package main
import (
"fmt"
"os"
"path"
"strings"
"github.com/pelletier/go-toml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/opencontainers/runtime-spec/specs-go"
)
const (
configOverride = "XDG_CONFIG_HOME"
configFilePath = "nvidia-container-runtime/config.toml"
// version must be set by go build's -X main.version= option in the Makefile.
var version = "unknown"
hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook"
)
var (
configDir = "/etc/"
)
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
var logger = NewLogger()
func main() {
err := run(os.Args)
if err != nil {
logger.Errorf("Error running %v: %v", os.Args, err)
logger.Errorf("%v", err)
os.Exit(1)
}
}
// run is an entry point that allows for idiomatic handling of errors
// when calling from the main function.
func run(argv []string) (err error) {
cfg, err := getConfig()
func run(argv []string) (rerr error) {
printVersion := hasVersionFlag(argv)
if printVersion {
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
}
cfg, err := config.GetConfig()
if err != nil {
return fmt.Errorf("error loading config: %v", err)
}
err = logger.LogToFile(cfg.debugFilePath)
logger, err = UpdateLogger(
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
argv,
)
if err != nil {
return fmt.Errorf("error opening debug log file: %v", err)
return fmt.Errorf("failed to set up logger: %v", err)
}
defer func() {
// We capture and log a returning error before closing the log file.
if err != nil {
logger.Errorf("Error running %v: %v", argv, err)
defer logger.Reset()
logger.Debugf("Command line arguments: %v", argv)
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
if err != nil {
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
}
if printVersion {
fmt.Print("\n")
}
return runtime.Exec(argv)
}
// TODO: This should be refactored / combined with parseArgs in logger.
func hasVersionFlag(args []string) bool {
for i := 0; i < len(args); i++ {
param := args[i]
parts := strings.SplitN(param, "=", 2)
trimmed := strings.TrimLeft(parts[0], "-")
// If this is not a flag we continue
if parts[0] == trimmed {
continue
}
logger.CloseFile()
}()
r, err := newRuntime(argv)
if err != nil {
return fmt.Errorf("error creating runtime: %v", err)
// Check the version flag
if trimmed == "version" {
return true
}
}
logger.Printf("Running %s\n", argv[0])
return r.Exec(argv)
}
type config struct {
debugFilePath string
}
// getConfig sets up the config struct. Values are read from a toml file
// or set via the environment.
func getConfig() (*config, error) {
cfg := &config{}
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
configDir = XDGConfigDir
}
configFilePath := path.Join(configDir, configFilePath)
tomlContent, err := os.ReadFile(configFilePath)
if err != nil {
return nil, err
}
toml, err := toml.Load(string(tomlContent))
if err != nil {
return nil, err
}
cfg.debugFilePath = toml.GetDefault("nvidia-container-runtime.debug", "/dev/null").(string)
return cfg, nil
return false
}

View File

@@ -3,15 +3,15 @@ package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/require"
)
@@ -24,6 +24,10 @@ const (
unmodifiedSpecFileSuffix = "test/input/test_spec.json"
)
const (
runcExecutableName = "runc"
)
type testConfig struct {
root string
binPath string
@@ -35,7 +39,7 @@ func TestMain(m *testing.M) {
// TEST SETUP
// Determine the module root and the test binary path
var err error
moduleRoot, err := getModuleRoot()
moduleRoot, err := test.GetModuleRoot()
if err != nil {
logger.Fatalf("error in test setup: could not get module root: %v", err)
}
@@ -43,7 +47,7 @@ func TestMain(m *testing.M) {
testInputPath := filepath.Join(moduleRoot, "test", "input")
// Set the environment variables for the test
os.Setenv("PATH", prependToPath(testBinPath, moduleRoot))
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
os.Setenv("XDG_CONFIG_HOME", testInputPath)
// Confirm that the environment is configured correctly
@@ -71,31 +75,6 @@ func TestMain(m *testing.M) {
os.Exit(exitCode)
}
func getModuleRoot() (string, error) {
_, filename, _, _ := runtime.Caller(0)
return hasGoMod(filename)
}
func hasGoMod(dir string) (string, error) {
if dir == "" || dir == "/" {
return "", fmt.Errorf("module root not found")
}
_, err := os.Stat(filepath.Join(dir, "go.mod"))
if err != nil {
return hasGoMod(filepath.Dir(dir))
}
return dir, nil
}
func prependToPath(additionalPaths ...string) string {
paths := strings.Split(os.Getenv("PATH"), ":")
paths = append(additionalPaths, paths...)
return strings.Join(paths, ":")
}
// case 1) nvidia-container-runtime run --bundle
// case 2) nvidia-container-runtime create --bundle
// - Confirm the runtime handles bad input correctly
@@ -105,11 +84,6 @@ func TestBadInput(t *testing.T) {
t.Fatal(err)
}
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle")
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
output, err := cmdRun.CombinedOutput()
require.Errorf(t, err, "runtime should return an error", "output=%v", string(output))
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
err = cmdCreate.Run()
@@ -193,11 +167,11 @@ func TestDuplicateHook(t *testing.T) {
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json")
}
// addNVIDIAHook is a basic wrapper for nvidiaContainerRunime.addNVIDIAHook that is used for
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
// testing.
func addNVIDIAHook(spec *specs.Spec) error {
r := nvidiaContainerRuntime{logger: logger.Logger}
return r.addNVIDIAHook(spec)
m := modifier.NewStableRuntimeModifier(logger.Logger)
return m.Modify(spec)
}
func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
@@ -270,24 +244,3 @@ func nvidiaHookCount(hooks *specs.Hooks) int {
}
return count
}
func TestGetConfigWithCustomConfig(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
// By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
testDir := filepath.Join(wd, "test")
filename := filepath.Join(testDir, configFilePath)
os.Setenv(configOverride, testDir)
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
cfg, err := getConfig()
require.NoError(t, err)
require.Equal(t, cfg.debugFilePath, "/nvidia-container-toolkit.log")
}

View File

@@ -1,145 +0,0 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package main
import (
"fmt"
"os"
"os/exec"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
log "github.com/sirupsen/logrus"
)
// nvidiaContainerRuntime encapsulates the NVIDIA Container Runtime. It wraps the specified runtime, conditionally
// modifying the specified OCI specification before invoking the runtime.
type nvidiaContainerRuntime struct {
logger *log.Logger
runtime oci.Runtime
ociSpec oci.Spec
}
var _ oci.Runtime = (*nvidiaContainerRuntime)(nil)
// newNvidiaContainerRuntime is a constructor for a standard runtime shim.
func newNvidiaContainerRuntimeWithLogger(logger *log.Logger, runtime oci.Runtime, ociSpec oci.Spec) (oci.Runtime, error) {
r := nvidiaContainerRuntime{
logger: logger,
runtime: runtime,
ociSpec: ociSpec,
}
return &r, nil
}
// Exec defines the entrypoint for the NVIDIA Container Runtime. A check is performed to see whether modifications
// to the OCI spec are required -- and applicable modifcations applied. The supplied arguments are then
// forwarded to the underlying runtime's Exec method.
func (r nvidiaContainerRuntime) Exec(args []string) error {
if r.modificationRequired(args) {
err := r.modifyOCISpec()
if err != nil {
return fmt.Errorf("error modifying OCI spec: %v", err)
}
}
r.logger.Println("Forwarding command to runtime")
return r.runtime.Exec(args)
}
// modificationRequired checks the intput arguments to determine whether a modification
// to the OCI spec is required.
func (r nvidiaContainerRuntime) modificationRequired(args []string) bool {
var previousWasBundle bool
for _, a := range args {
// We check for '--bundle create' explicitly to ensure that we
// don't inadvertently trigger a modification if the bundle directory
// is specified as `create`
if !previousWasBundle && isBundleFlag(a) {
previousWasBundle = true
continue
}
if !previousWasBundle && a == "create" {
r.logger.Infof("'create' command detected; modification required")
return true
}
previousWasBundle = false
}
r.logger.Infof("No modification required")
return false
}
// modifyOCISpec loads and modifies the OCI spec specified in the nvidiaContainerRuntime
// struct. The spec is modified in-place and written to the same file as the input after
// modifcationas are applied.
func (r nvidiaContainerRuntime) modifyOCISpec() error {
err := r.ociSpec.Load()
if err != nil {
return fmt.Errorf("error loading OCI specification for modification: %v", err)
}
err = r.ociSpec.Modify(r.addNVIDIAHook)
if err != nil {
return fmt.Errorf("error injecting NVIDIA Container Runtime hook: %v", err)
}
err = r.ociSpec.Flush()
if err != nil {
return fmt.Errorf("error writing modified OCI specification: %v", err)
}
return nil
}
// addNVIDIAHook modifies the specified OCI specification in-place, inserting a
// prestart hook.
func (r nvidiaContainerRuntime) addNVIDIAHook(spec *specs.Spec) error {
path, err := exec.LookPath("nvidia-container-runtime-hook")
if err != nil {
path = hookDefaultFilePath
_, err = os.Stat(path)
if err != nil {
return err
}
}
r.logger.Printf("prestart hook path: %s\n", path)
args := []string{path}
if spec.Hooks == nil {
spec.Hooks = &specs.Hooks{}
} else if len(spec.Hooks.Prestart) != 0 {
for _, hook := range spec.Hooks.Prestart {
if !strings.Contains(hook.Path, "nvidia-container-runtime-hook") {
continue
}
r.logger.Println("existing nvidia prestart hook in OCI spec file")
return nil
}
}
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
Path: path,
Args: append(args, "prestart"),
})
return nil
}

View File

@@ -1,226 +0,0 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package main
import (
"fmt"
"strings"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestArgsGetConfigFilePath(t *testing.T) {
testCases := []struct {
bundleDir string
ociSpecPath string
}{
{
ociSpecPath: "config.json",
},
{
bundleDir: "/foo/bar",
ociSpecPath: "/foo/bar/config.json",
},
{
bundleDir: "/foo/bar/",
ociSpecPath: "/foo/bar/config.json",
},
}
for i, tc := range testCases {
cp, err := getOCISpecFilePath(tc.bundleDir)
require.NoErrorf(t, err, "%d: %v", i, tc)
require.Equalf(t, tc.ociSpecPath, cp, "%d: %v", i, tc)
}
}
func TestAddNvidiaHook(t *testing.T) {
logger, logHook := testlog.NewNullLogger()
shim := nvidiaContainerRuntime{
logger: logger,
}
testCases := []struct {
spec *specs.Spec
errorPrefix string
shouldNotAdd bool
}{
{
spec: &specs.Spec{},
},
{
spec: &specs.Spec{
Hooks: &specs.Hooks{},
},
},
{
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{{
Path: "some-hook",
}},
},
},
},
{
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{{
Path: "nvidia-container-runtime-hook",
}},
},
},
shouldNotAdd: true,
},
}
for i, tc := range testCases {
logHook.Reset()
var numPrestartHooks int
if tc.spec.Hooks != nil {
numPrestartHooks = len(tc.spec.Hooks.Prestart)
}
err := shim.addNVIDIAHook(tc.spec)
if tc.errorPrefix == "" {
require.NoErrorf(t, err, "%d: %v", i, tc)
} else {
require.Truef(t, strings.HasPrefix(err.Error(), tc.errorPrefix), "%d: %v", i, tc)
require.NotNilf(t, tc.spec.Hooks, "%d: %v", i, tc)
require.Equalf(t, 1, nvidiaHookCount(tc.spec.Hooks), "%d: %v", i, tc)
if tc.shouldNotAdd {
require.Equal(t, numPrestartHooks+1, len(tc.spec.Hooks.Poststart), "%d: %v", i, tc)
} else {
require.Equal(t, numPrestartHooks+1, len(tc.spec.Hooks.Poststart), "%d: %v", i, tc)
nvidiaHook := tc.spec.Hooks.Poststart[len(tc.spec.Hooks.Poststart)-1]
// TODO: This assumes that the hook has been set up in the makefile
expectedPath := "/usr/bin/nvidia-container-runtime-hook"
require.Equalf(t, expectedPath, nvidiaHook.Path, "%d: %v", i, tc)
require.Equalf(t, []string{expectedPath, "prestart"}, nvidiaHook.Args, "%d: %v", i, tc)
require.Emptyf(t, nvidiaHook.Env, "%d: %v", i, tc)
require.Nilf(t, nvidiaHook.Timeout, "%d: %v", i, tc)
}
}
}
}
func TestNvidiaContainerRuntime(t *testing.T) {
logger, hook := testlog.NewNullLogger()
testCases := []struct {
shim nvidiaContainerRuntime
shouldModify bool
args []string
modifyError error
writeError error
}{
{
shim: nvidiaContainerRuntime{},
shouldModify: false,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"create"},
shouldModify: true,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"--bundle=create"},
shouldModify: false,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"--bundle", "create"},
shouldModify: false,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"create"},
shouldModify: true,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"create"},
modifyError: fmt.Errorf("error modifying"),
shouldModify: true,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"create"},
writeError: fmt.Errorf("error writing"),
shouldModify: true,
},
}
for i, tc := range testCases {
tc.shim.logger = logger
hook.Reset()
spec := &specs.Spec{}
ociMock := oci.NewMockSpec(spec, tc.writeError, tc.modifyError)
require.Equal(t, tc.shouldModify, tc.shim.modificationRequired(tc.args), "%d: %v", i, tc)
tc.shim.ociSpec = ociMock
tc.shim.runtime = &MockShim{}
err := tc.shim.Exec(tc.args)
if tc.modifyError != nil || tc.writeError != nil {
require.Error(t, err, "%d: %v", i, tc)
} else {
require.NoError(t, err, "%d: %v", i, tc)
}
if tc.shouldModify {
require.Equal(t, 1, ociMock.MockModify.Callcount, "%d: %v", i, tc)
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "%d: %v", i, tc)
} else {
require.Equal(t, 0, ociMock.MockModify.Callcount, "%d: %v", i, tc)
require.Nil(t, spec.Hooks, "%d: %v", i, tc)
}
writeExpected := tc.shouldModify && tc.modifyError == nil
if writeExpected {
require.Equal(t, 1, ociMock.MockFlush.Callcount, "%d: %v", i, tc)
} else {
require.Equal(t, 0, ociMock.MockFlush.Callcount, "%d: %v", i, tc)
}
}
}
type MockShim struct {
called bool
args []string
returnError error
}
func (m *MockShim) Exec(args []string) error {
m.called = true
m.args = args
return m.returnError
}

View File

@@ -1,5 +1,5 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,149 +18,88 @@ package main
import (
"fmt"
"os/exec"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
"github.com/sirupsen/logrus"
)
const (
ociSpecFileName = "config.json"
dockerRuncExecutableName = "docker-runc"
runcExecutableName = "runc"
)
// newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger
func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv []string) (oci.Runtime, error) {
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, cfg.NVIDIAContainerRuntimeConfig.Runtimes)
if err != nil {
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
}
// newRuntime is a factory method that constructs a runtime based on the selected configuration.
func newRuntime(argv []string) (oci.Runtime, error) {
ociSpec, err := newOCISpec(argv)
if !oci.HasCreateSubcommand(argv) {
logger.Debugf("Skipping modifier for non-create subcommand")
return lowLevelRuntime, nil
}
ociSpec, err := oci.NewSpec(logger, argv)
if err != nil {
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
}
runc, err := newRuncRuntime()
specModifier, err := newSpecModifier(logger, cfg, ociSpec, argv)
if err != nil {
return nil, fmt.Errorf("error constructing runc runtime: %v", err)
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
}
r, err := newNvidiaContainerRuntimeWithLogger(logger.Logger, runc, ociSpec)
if err != nil {
return nil, fmt.Errorf("error constructing NVIDIA Container Runtime: %v", err)
}
// Create the wrapping runtime with the specified modifier
r := runtime.NewModifyingRuntimeWrapper(
logger,
lowLevelRuntime,
ociSpec,
specModifier,
)
return r, nil
}
// newOCISpec constructs an OCI spec for the provided arguments
func newOCISpec(argv []string) (oci.Spec, error) {
bundlePath, err := getBundlePath(argv)
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
modeModifier, err := newModeModifier(logger, cfg, ociSpec, argv)
if err != nil {
return nil, fmt.Errorf("error parsing command line arguments: %v", err)
return nil, err
}
ociSpecPath, err := getOCISpecFilePath(bundlePath)
gdsModifier, err := modifier.NewGDSModifier(logger, cfg, ociSpec)
if err != nil {
return nil, fmt.Errorf("error getting OCI specification file path: %v", err)
return nil, err
}
ociSpec := oci.NewSpecFromFile(ociSpecPath)
return ociSpec, nil
}
// newRuncRuntime locates the runc binary and wraps it in a SyscallExecRuntime
func newRuncRuntime() (oci.Runtime, error) {
runtimePath, err := findRunc()
mofedModifier, err := modifier.NewMOFEDModifier(logger, cfg, ociSpec)
if err != nil {
return nil, fmt.Errorf("error locating runtime: %v", err)
return nil, err
}
runc, err := oci.NewSyscallExecRuntimeWithLogger(logger.Logger, runtimePath)
tegraModifier, err := modifier.NewTegraPlatformFiles(logger)
if err != nil {
return nil, fmt.Errorf("error constructing runtime: %v", err)
return nil, err
}
return runc, nil
modifiers := modifier.Merge(
modeModifier,
gdsModifier,
mofedModifier,
tegraModifier,
)
return modifiers, nil
}
// getBundlePath checks the specified slice of strings (argv) for a 'bundle' flag as allowed by runc.
// The following are supported:
// --bundle{{SEP}}BUNDLE_PATH
// -bundle{{SEP}}BUNDLE_PATH
// -b{{SEP}}BUNDLE_PATH
// where {{SEP}} is either ' ' or '='
func getBundlePath(argv []string) (string, error) {
var bundlePath string
for i := 0; i < len(argv); i++ {
param := argv[i]
parts := strings.SplitN(param, "=", 2)
if !isBundleFlag(parts[0]) {
continue
}
// The flag has the format --bundle=/path
if len(parts) == 2 {
bundlePath = parts[1]
continue
}
// The flag has the format --bundle /path
if i+1 < len(argv) {
bundlePath = argv[i+1]
i++
continue
}
// --bundle / -b was the last element of argv
return "", fmt.Errorf("bundle option requires an argument")
func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
case "legacy":
return modifier.NewStableRuntimeModifier(logger), nil
case "csv":
return modifier.NewCSVModifier(logger, cfg, ociSpec)
case "cdi":
return modifier.NewCDIModifier(logger, cfg, ociSpec)
}
return bundlePath, nil
}
// findRunc locates runc in the path, returning the full path to the
// binary or an error.
func findRunc() (string, error) {
runtimeCandidates := []string{
dockerRuncExecutableName,
runcExecutableName,
}
return findRuntime(runtimeCandidates)
}
func findRuntime(runtimeCandidates []string) (string, error) {
for _, candidate := range runtimeCandidates {
logger.Infof("Looking for runtime binary '%v'", candidate)
runcPath, err := exec.LookPath(candidate)
if err == nil {
logger.Infof("Found runtime binary '%v'", runcPath)
return runcPath, nil
}
logger.Warnf("Runtime binary '%v' not found: %v", candidate, err)
}
return "", fmt.Errorf("no runtime binary found from candidate list: %v", runtimeCandidates)
}
func isBundleFlag(arg string) bool {
if !strings.HasPrefix(arg, "-") {
return false
}
trimmed := strings.TrimLeft(arg, "-")
return trimmed == "b" || trimmed == "bundle"
}
// getOCISpecFilePath returns the expected path to the OCI specification file for the given
// bundle directory. If the bundle directory is empty, only `config.json` is returned.
func getOCISpecFilePath(bundleDir string) (string, error) {
logger.Infof("Using bundle directory: %v", bundleDir)
OCISpecFilePath := filepath.Join(bundleDir, ociSpecFileName)
logger.Infof("Using OCI specification file path: %v", OCISpecFilePath)
return OCISpecFilePath, nil
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
}

View File

@@ -17,176 +17,113 @@
package main
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestConstructor(t *testing.T) {
shim, err := newRuntime([]string{})
require.NoError(t, err)
require.NotNil(t, shim)
}
func TestGetBundlePath(t *testing.T) {
type expected struct {
bundle string
isError bool
}
testCases := []struct {
argv []string
expected expected
}{
{
argv: []string{},
},
{
argv: []string{"create"},
},
{
argv: []string{"--bundle"},
expected: expected{
isError: true,
},
},
{
argv: []string{"-b"},
expected: expected{
isError: true,
},
},
{
argv: []string{"--bundle", "/foo/bar"},
expected: expected{
bundle: "/foo/bar",
},
},
{
argv: []string{"--not-bundle", "/foo/bar"},
},
{
argv: []string{"--"},
},
{
argv: []string{"-bundle", "/foo/bar"},
expected: expected{
bundle: "/foo/bar",
},
},
{
argv: []string{"--bundle=/foo/bar"},
expected: expected{
bundle: "/foo/bar",
},
},
{
argv: []string{"-b=/foo/bar"},
expected: expected{
bundle: "/foo/bar",
},
},
{
argv: []string{"-b=/foo/=bar"},
expected: expected{
bundle: "/foo/=bar",
},
},
{
argv: []string{"-b", "/foo/bar"},
expected: expected{
bundle: "/foo/bar",
},
},
{
argv: []string{"create", "-b", "/foo/bar"},
expected: expected{
bundle: "/foo/bar",
},
},
{
argv: []string{"-b", "create", "create"},
expected: expected{
bundle: "create",
},
},
{
argv: []string{"-b=create", "create"},
expected: expected{
bundle: "create",
},
},
{
argv: []string{"-b", "create"},
expected: expected{
bundle: "create",
},
},
}
for i, tc := range testCases {
bundle, err := getBundlePath(tc.argv)
if tc.expected.isError {
require.Errorf(t, err, "%d: %v", i, tc)
} else {
require.NoErrorf(t, err, "%d: %v", i, tc)
}
require.Equalf(t, tc.expected.bundle, bundle, "%d: %v", i, tc)
}
}
func TestFindRunc(t *testing.T) {
testLogger, _ := testlog.NewNullLogger()
logger.Logger = testLogger
runcPath, err := findRunc()
require.NoError(t, err)
require.Equal(t, filepath.Join(cfg.binPath, runcExecutableName), runcPath)
}
func TestFindRuntime(t *testing.T) {
testLogger, _ := testlog.NewNullLogger()
logger.Logger = testLogger
func TestFactoryMethod(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
candidates []string
expectedPath string
description string
cfg *config.Config
spec *specs.Spec
expectedError bool
}{
{
candidates: []string{},
description: "empty config raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
},
expectedError: true,
},
{
candidates: []string{"not-runc"},
description: "config with runtime raises no error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "legacy",
},
},
},
{
candidates: []string{"not-runc", "also-not-runc"},
description: "csv mode is supported",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "csv",
},
},
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{
"NVIDIA_VISIBLE_DEVICES=all",
},
},
},
},
{
candidates: []string{runcExecutableName},
expectedPath: filepath.Join(cfg.binPath, runcExecutableName),
description: "non-legacy discover mode raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "non-legacy",
},
},
expectedError: true,
},
{
candidates: []string{runcExecutableName, "not-runc"},
expectedPath: filepath.Join(cfg.binPath, runcExecutableName),
description: "legacy discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "legacy",
},
},
},
{
candidates: []string{"not-runc", runcExecutableName},
expectedPath: filepath.Join(cfg.binPath, runcExecutableName),
description: "csv discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "csv",
},
},
},
{
description: "empty mode raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
},
},
expectedError: true,
},
}
for i, tc := range testCases {
runcPath, err := findRuntime(tc.candidates)
if tc.expectedPath == "" {
require.Error(t, err, "%d: %v", i, tc)
} else {
require.NoError(t, err, "%d: %v", i, tc)
}
require.Equal(t, tc.expectedPath, runcPath, "%d: %v", i, tc)
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
bundleDir := t.TempDir()
specFile, err := os.Create(filepath.Join(bundleDir, "config.json"))
require.NoError(t, err)
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
argv := []string{"--bundle", bundleDir, "create"}
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
if tc.expectedError {
require.Error(t, err)
} else {
require.NoError(t, err)
}
})
}
}

View File

@@ -1,27 +0,0 @@
package main
import (
"log"
)
func capabilityToCLI(cap string) string {
switch cap {
case "compute":
return "--compute"
case "compat32":
return "--compat32"
case "graphics":
return "--graphics"
case "utility":
return "--utility"
case "video":
return "--video"
case "display":
return "--display"
case "ngx":
return "--ngx"
default:
log.Panicln("unknown driver capability:", cap)
}
return ""
}

17
cmd/nvidia-ctk/README.md Normal file
View File

@@ -0,0 +1,17 @@
# NVIDIA Container Toolkit CLI
The NVIDIA Container Toolkit CLI `nvidia-ctk` provides a number of utilities that are useful for working with the NVIDIA Container Toolkit.
## Functionality
### Configure runtimes
The `runtime` command of the `nvidia-ctk` CLI provides a set of utilities to related to the configuration
and management of supported container engines.
For example, running the following command:
```bash
nvidia-ctk runtime configure --set-as-default
```
will ensure that the NVIDIA Container Runtime is added as the default runtime to the default container
engine.

View File

@@ -0,0 +1,229 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package symlinks
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
}
type config struct {
hostRoot string
filenames cli.StringSlice
links cli.StringSlice
containerSpec string
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
cfg := config{}
// Create the '' command
c := cli.Command{
Name: "create-symlinks",
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "host-root",
Usage: "The root on the host filesystem to use to resolve symlinks",
Destination: &cfg.hostRoot,
},
&cli.StringSliceFlag{
Name: "csv-filename",
Usage: "Specify a (CSV) filename to process",
Destination: &cfg.filenames,
},
&cli.StringSliceFlag{
Name: "link",
Usage: "Specify a specific link to create. The link is specified as source:target",
Destination: &cfg.links,
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
csvFiles := cfg.filenames.Value()
chainLocator := lookup.NewSymlinkChainLocator(m.logger, cfg.hostRoot)
var candidates []string
for _, file := range csvFiles {
mountSpecs, err := csv.NewCSVFileParser(m.logger, file).Parse()
if err != nil {
m.logger.Debugf("Skipping CSV file %v: %v", file, err)
continue
}
for _, ms := range mountSpecs {
if ms.Type != csv.MountSpecSym {
continue
}
targets, err := chainLocator.Locate(ms.Path)
if err != nil {
m.logger.Warnf("Failed to locate symlink %v", ms.Path)
}
candidates = append(candidates, targets...)
}
}
created := make(map[string]bool)
// candidates is a list of absolute paths to symlinks in a chain, or the final target of the chain.
for _, candidate := range candidates {
targets, err := m.Locate(candidate)
if err != nil {
m.logger.Debugf("Skipping invalid link: %v", err)
continue
} else if len(targets) != 1 {
m.logger.Debugf("Unexepected number of targets: %v", targets)
continue
} else if targets[0] == candidate {
m.logger.Debugf("%v is not a symlink", candidate)
continue
}
err = m.createLink(created, cfg.hostRoot, containerRoot, targets[0], candidate)
if err != nil {
m.logger.Warnf("Failed to create link %v: %v", []string{targets[0], candidate}, err)
}
}
links := cfg.links.Value()
for _, l := range links {
parts := strings.Split(l, ":")
if len(parts) != 2 {
m.logger.Warnf("Invalid link specification %v", l)
continue
}
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
if err != nil {
m.logger.Warnf("Failed to create link %v: %v", parts, err)
}
}
return nil
}
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
linkPath, err := changeRoot(hostRoot, containerRoot, link)
if err != nil {
m.logger.Warnf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
}
if created[linkPath] {
m.logger.Debugf("Link %v already created", linkPath)
return nil
}
targetPath, err := changeRoot(hostRoot, "/", target)
if err != nil {
m.logger.Warnf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
}
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)
err = os.MkdirAll(filepath.Dir(linkPath), 0755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
err = os.Symlink(target, linkPath)
if err != nil {
return fmt.Errorf("failed to create symlink: %v", err)
}
return nil
}
func changeRoot(current string, new string, path string) (string, error) {
if !filepath.IsAbs(path) {
return path, nil
}
relative := path
if current != "" {
r, err := filepath.Rel(current, path)
if err != nil {
return "", err
}
relative = r
}
return filepath.Join(new, relative), nil
}
// Locate returns the link target of the specified filename or an empty slice if the
// specified filename is not a symlink.
func (m command) Locate(filename string) ([]string, error) {
info, err := os.Lstat(filename)
if err != nil {
return nil, fmt.Errorf("failed to get file info: %v", info)
}
if info.Mode()&os.ModeSymlink == 0 {
m.logger.Debugf("%v is not a symlink", filename)
return nil, nil
}
target, err := os.Readlink(filename)
if err != nil {
return nil, fmt.Errorf("error checking symlink: %v", err)
}
m.logger.Debugf("Resolved link: '%v' => '%v'", filename, target)
return []string{target}, nil
}

View File

@@ -0,0 +1,52 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package hook
import (
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type hookCommand struct {
logger *logrus.Logger
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := hookCommand{
logger: logger,
}
return c.build()
}
// build
func (m hookCommand) build() *cli.Command {
// Create the 'hook' command
hook := cli.Command{
Name: "hook",
Usage: "A collection of hooks that may be injected into an OCI spec",
}
hook.Subcommands = []*cli.Command{
ldcache.NewCommand(m.logger),
symlinks.NewCommand(m.logger),
}
return &hook
}

View File

@@ -0,0 +1,129 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
}
type config struct {
folders cli.StringSlice
containerSpec string
}
// NewCommand constructs an update-ldcache command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build the update-ldcache command
func (m command) build() *cli.Command {
cfg := config{}
// Create the 'update-ldcache' command
c := cli.Command{
Name: "update-ldcache",
Usage: "Update ldcache in a container by running ldconfig",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "folder",
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
Destination: &cfg.folders,
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
err = m.createConfig(containerRoot, cfg.folders.Value())
if err != nil {
return fmt.Errorf("failed to update ld.so.conf: %v", err)
}
args := []string{"/sbin/ldconfig"}
if containerRoot != "" {
args = append(args, "-r", containerRoot)
}
return syscall.Exec(args[0], args, nil)
}
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
// to include the required paths.
func (m command) createConfig(root string, folders []string) error {
if len(folders) == 0 {
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
return nil
}
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
if err != nil {
return fmt.Errorf("failed to create config file: %v", err)
}
defer configFile.Close()
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
configured := make(map[string]bool)
for _, folder := range folders {
if configured[folder] {
continue
}
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
}
configured[folder] = true
}
return nil
}

83
cmd/nvidia-ctk/main.go Normal file
View File

@@ -0,0 +1,83 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
)
var logger = log.New()
// config defines the options that can be set for the CLI through config files,
// environment variables, or command line flags
type config struct {
// Debug indicates whether the CLI is started in "debug" mode
Debug bool
}
func main() {
// Create a config struct to hold the parsed environment variables or command line flags
config := config{}
// Create the top-level CLI
c := cli.NewApp()
c.Name = "NVIDIA Container Toolkit CLI"
c.UseShortOptionHandling = true
c.EnableBashCompletion = true
c.Usage = "Tools to configure the NVIDIA Container Toolkit"
c.Version = info.GetVersionString()
// Setup the flags for this command
c.Flags = []cli.Flag{
&cli.BoolFlag{
Name: "debug",
Aliases: []string{"d"},
Usage: "Enable debug-level logging",
Destination: &config.Debug,
EnvVars: []string{"NVIDIA_CTK_DEBUG"},
},
}
// Set log-level for all subcommands
c.Before = func(c *cli.Context) error {
logLevel := log.InfoLevel
if config.Debug {
logLevel = log.DebugLevel
}
logger.SetLevel(logLevel)
return nil
}
// Define the subcommands
c.Commands = []*cli.Command{
hook.NewCommand(logger),
runtime.NewCommand(logger),
}
// Run the CLI
err := c.Run(os.Args)
if err != nil {
log.Errorf("%v", err)
log.Exit(1)
}
}

View File

@@ -0,0 +1,154 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package configure
import (
"encoding/json"
"fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
const (
defaultRuntime = "docker"
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
)
type command struct {
logger *logrus.Logger
}
// NewCommand constructs an configure command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// config defines the options that can be set for the CLI through config files,
// environment variables, or command line config
type config struct {
dryRun bool
runtime string
configFilePath string
nvidiaOptions nvidia.Options
}
func (m command) build() *cli.Command {
// Create a config struct to hold the parsed environment variables or command line flags
config := config{}
// Create the 'configure' command
configure := cli.Command{
Name: "configure",
Usage: "Add a runtime to the specified container engine",
Action: func(c *cli.Context) error {
return m.configureWrapper(c, &config)
},
}
configure.Flags = []cli.Flag{
&cli.BoolFlag{
Name: "dry-run",
Usage: "update the runtime configuration as required but don't write changes to disk",
Destination: &config.dryRun,
},
&cli.StringFlag{
Name: "runtime",
Usage: "the target runtime engine. One of [docker]",
Value: defaultRuntime,
Destination: &config.runtime,
},
&cli.StringFlag{
Name: "config",
Usage: "path to the config file for the target runtime",
Destination: &config.configFilePath,
},
&cli.StringFlag{
Name: "nvidia-runtime-name",
Usage: "specify the name of the NVIDIA runtime that will be added",
Value: nvidia.RuntimeName,
Destination: &config.nvidiaOptions.RuntimeName,
},
&cli.StringFlag{
Name: "runtime-path",
Usage: "specify the path to the NVIDIA runtime executable",
Value: nvidia.RuntimeExecutable,
Destination: &config.nvidiaOptions.RuntimePath,
},
&cli.BoolFlag{
Name: "set-as-default",
Usage: "set the specified runtime as the default runtime",
Destination: &config.nvidiaOptions.SetAsDefault,
},
}
return &configure
}
func (m command) configureWrapper(c *cli.Context, config *config) error {
switch config.runtime {
case "docker":
return m.configureDocker(c, config)
}
return fmt.Errorf("unrecognized runtime '%v'", config.runtime)
}
// configureDocker updates the docker config to enable the NVIDIA Container Runtime
func (m command) configureDocker(c *cli.Context, config *config) error {
configFilePath := config.configFilePath
if configFilePath == "" {
configFilePath = defaultDockerConfigFilePath
}
cfg, err := docker.LoadConfig(configFilePath)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
defaultRuntime := config.nvidiaOptions.DefaultRuntime()
runtimeConfig := config.nvidiaOptions.Runtime().DockerRuntimesConfig()
err = docker.UpdateConfig(cfg, defaultRuntime, runtimeConfig)
if err != nil {
return fmt.Errorf("unable to update config: %v", err)
}
if config.dryRun {
output, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return fmt.Errorf("unable to convert to JSON: %v", err)
}
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
return nil
}
err = docker.FlushConfig(cfg, configFilePath)
if err != nil {
return fmt.Errorf("unable to flush config: %v", err)
}
m.logger.Infof("Wrote updated config to %v", configFilePath)
m.logger.Infof("It is recommended that the docker daemon be restarted.")
return nil
}

View File

@@ -0,0 +1,75 @@
/*
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package nvidia
const (
// RuntimeName is the default name to use in configs for the NVIDIA Container Runtime
RuntimeName = "nvidia"
// RuntimeExecutable is the default NVIDIA Container Runtime executable file name
RuntimeExecutable = "nvidia-container-runtime"
)
// Options specifies the options for the NVIDIA Container Runtime w.r.t a container engine such as docker.
type Options struct {
SetAsDefault bool
RuntimeName string
RuntimePath string
}
// Runtime defines an NVIDIA runtime with a name and a executable
type Runtime struct {
Name string
Path string
}
// DefaultRuntime returns the default runtime for the configured options.
// If the configuration is invalid or the default runtimes should not be set
// the empty string is returned.
func (o Options) DefaultRuntime() string {
if !o.SetAsDefault {
return ""
}
return o.RuntimeName
}
// Runtime creates a runtime struct based on the options.
func (o Options) Runtime() Runtime {
path := o.RuntimePath
if o.RuntimePath == "" {
path = RuntimeExecutable
}
r := Runtime{
Name: o.RuntimeName,
Path: path,
}
return r
}
// DockerRuntimesConfig generatest the expected docker config for the specified runtime
func (r Runtime) DockerRuntimesConfig() map[string]interface{} {
runtimes := make(map[string]interface{})
runtimes[r.Name] = map[string]interface{}{
"path": r.Path,
"args": []string{},
}
return runtimes
}

View File

@@ -0,0 +1,49 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package runtime
import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type runtimeCommand struct {
logger *logrus.Logger
}
// NewCommand constructs a runtime command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := runtimeCommand{
logger: logger,
}
return c.build()
}
func (m runtimeCommand) build() *cli.Command {
// Create the 'runtime' command
runtime := cli.Command{
Name: "runtime",
Usage: "A collection of runtime-related utilities for the NVIDIA Container Toolkit",
}
runtime.Subcommands = []*cli.Command{
configure.NewCommand(m.logger),
}
return &runtime
}

View File

@@ -1,18 +0,0 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@@ -16,3 +16,17 @@ ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -16,3 +16,17 @@ ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -16,3 +16,17 @@ ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -16,3 +16,17 @@ ldconfig = "@/sbin/ldconfig.real"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]
mode = "auto"
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"

View File

@@ -1,65 +0,0 @@
ARG BASEIMAGE
FROM ${BASEIMAGE}
RUN yum install -y \
ca-certificates \
wget \
git \
rpm-build \
make && \
rm -rf /var/cache/yum/*
ARG GOLANG_VERSION=0.0.0
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture"; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
# packaging
ARG PKG_VERS
ARG PKG_REV
ENV VERSION $PKG_VERS
ENV RELEASE $PKG_REV
# output directory
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
RUN mkdir -p $DIST_DIR /dist
# nvidia-container-toolkit
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
RUN make PREFIX=${DIST_DIR} cmds
COPY config/config.toml.amzn $DIST_DIR/config.toml
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
# This might not be useful on Amazon Linux, but it's simpler to keep the RHEL
# and Amazon Linux packages identical.
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
WORKDIR $DIST_DIR/..
COPY packaging/rpm .
CMD arch=$(uname -m) && \
rpmbuild --clean --target=$arch -bb \
-D "_topdir $PWD" \
-D "version $VERSION" \
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
-D "release $RELEASE" \
SPECS/nvidia-container-toolkit.spec && \
mv RPMS/$arch/*.rpm /dist

View File

@@ -32,6 +32,7 @@ ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
# packaging
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
@@ -48,9 +49,13 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
COPY config/config.toml.debian $DIST_DIR/config.toml
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
# Debian Jessie still had ldconfig.real
RUN if [ "$(lsb_release -cs)" = "jessie" ]; then \
@@ -60,12 +65,17 @@ RUN if [ "$(lsb_release -cs)" = "jessie" ]; then \
WORKDIR $DIST_DIR
COPY packaging/debian ./debian
RUN sed -i "s;@VERSION@;${REVISION};" debian/changelog && \
dch --changelog debian/changelog --append "Bump libnvidia-container dependency to ${REVISION}}" && \
dch --changelog debian/changelog -r "" && \
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
RUN dch --create --package="${PKG_NAME}" \
--newversion "${REVISION}" \
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER1_VERSION}" && \
dch -r "" && \
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
CMD export DISTRIB="$(lsb_release -cs)" && \
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_VERSION="${REVISION}" \
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
mv /tmp/nvidia-container-toolkit_*.deb /dist
mv /tmp/*.deb /dist

View File

@@ -14,7 +14,8 @@
ARG GOLANG_VERSION=x.x.x
FROM golang:${GOLANG_VERSION}
RUN go get -u golang.org/x/lint/golint
RUN go get -u github.com/matryer/moq
RUN go get -u github.com/gordonklaus/ineffassign
RUN go get -u github.com/client9/misspell/cmd/misspell
RUN go install golang.org/x/lint/golint@latest
RUN go install github.com/matryer/moq@latest
RUN go install github.com/gordonklaus/ineffassign@latest
RUN go install github.com/client9/misspell/cmd/misspell@latest
RUN go install github.com/google/go-licenses@latest

View File

@@ -25,11 +25,12 @@ ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
# packaging
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
ENV VERSION $PKG_VERS
ENV RELEASE $PKG_REV
ENV PKG_NAME ${PKG_NAME}
ENV PKG_VERS ${PKG_VERS}
ENV PKG_REV ${PKG_REV}
# output directory
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
@@ -39,6 +40,8 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
@@ -47,16 +50,23 @@ COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
COPY config/config.toml.opensuse-leap $DIST_DIR/config.toml
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
WORKDIR $DIST_DIR/..
COPY packaging/rpm .
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
CMD arch=$(uname -m) && \
rpmbuild --clean --target=$arch -bb \
-D "_topdir $PWD" \
-D "version $VERSION" \
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
-D "release $RELEASE" \
-D "release_date $(date +'%a %b %d %Y')" \
-D "git_commit ${GIT_COMMIT}" \
-D "version ${PKG_VERS}" \
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
-D "release ${PKG_REV}" \
SPECS/nvidia-container-toolkit.spec && \
mv RPMS/$arch/*.rpm /dist

View File

@@ -1,8 +1,25 @@
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is the dockerfile for building packages on yum-based RPM systems.
ARG BASEIMAGE
FROM ${BASEIMAGE}
RUN yum install -y \
ca-certificates \
gcc \
wget \
git \
make \
@@ -26,11 +43,12 @@ ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
# packaging
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
ENV VERSION $PKG_VERS
ENV RELEASE $PKG_REV
ENV PKG_NAME ${PKG_NAME}
ENV PKG_VERS ${PKG_VERS}
ENV PKG_REV ${PKG_REV}
# output directory
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
@@ -40,9 +58,13 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
COPY config/config.toml.centos $DIST_DIR/config.toml
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
@@ -53,11 +75,16 @@ COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
WORKDIR $DIST_DIR/..
COPY packaging/rpm .
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
CMD arch=$(uname -m) && \
rpmbuild --clean --target=$arch -bb \
-D "_topdir $PWD" \
-D "version $VERSION" \
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
-D "release $RELEASE" \
-D "release_date $(date +'%a %b %d %Y')" \
-D "git_commit ${GIT_COMMIT}" \
-D "version ${PKG_VERS}" \
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
-D "release ${PKG_REV}" \
SPECS/nvidia-container-toolkit.spec && \
mv RPMS/$arch/*.rpm /dist

View File

@@ -30,6 +30,7 @@ ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
# packaging
ARG PKG_NAME
ARG PKG_VERS
ARG PKG_REV
@@ -46,19 +47,28 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
COPY config/config.toml.ubuntu $DIST_DIR/config.toml
ARG CONFIG_TOML_SUFFIX
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
WORKDIR $DIST_DIR
COPY packaging/debian ./debian
RUN sed -i "s;@VERSION@;${REVISION};" debian/changelog && \
dch --changelog debian/changelog --append "Bump libnvidia-container dependency to ${REVISION}}" && \
dch --changelog debian/changelog -r "" && \
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
RUN dch --create --package="${PKG_NAME}" \
--newversion "${REVISION}" \
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" && \
dch -r "" && \
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
CMD export DISTRIB="$(lsb_release -cs)" && \
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_VERSION="${REVISION}" \
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
mv /tmp/*.deb /dist

View File

@@ -14,10 +14,10 @@
# Supported OSs by architecture
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux1 amazonlinux2 opensuse-leap15.1
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
AARCH64_TARGETS := centos8 rhel8 amazonlinux2
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
# Define top-level build targets
docker%: SHELL:=/bin/bash
@@ -85,48 +85,85 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
--%: docker-build-%
@
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
# private ubuntu target
--ubuntu%: OS := ubuntu
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--ubuntu%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
--ubuntu%: PKG_REV := 1
# private debian target
--debian%: OS := debian
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--debian%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
--debian%: PKG_REV := 1
# private centos target
--centos%: OS := centos
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
# private fedora target
--fedora%: OS := fedora
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
# The fedora(35) base image has very slow performance when building aarch64 packages.
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
# private amazonlinux target
--amazonlinux%: OS := amazonlinux
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
# private opensuse-leap target
--opensuse-leap%: OS = opensuse-leap
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
--opensuse-leap%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
# private rhel target (actually built on centos)
--rhel%: OS := centos
--rhel%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
--rhel%: CONFIG_TOML_SUFFIX := rpm-yum
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
# We allow the CONFIG_TOML_SUFFIX to be overridden.
CONFIG_TOML_SUFFIX ?= $(OS)
docker-build-%:
@echo "Building for $(TARGET_PLATFORM)"
docker pull --platform=linux/$(ARCH) $(BASEIMAGE)
DOCKER_BUILDKIT=1 \
$(DOCKER) build \
--platform=linux/$(ARCH) \
--progress=plain \
--build-arg BASEIMAGE="$(BASEIMAGE)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg PKG_NAME="$(LIB_NAME)" \
--build-arg PKG_VERS="$(LIB_VERSION)" \
--build-arg PKG_REV="$(PKG_REV)" \
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--tag $(BUILDIMAGE) \
--file $(DOCKERFILE) .
$(DOCKER) run \
--platform=linux/$(ARCH) \
-e DISTRIB \
-e SECTION \
-v $(ARTIFACTS_DIR):/dist \

23
go.mod
View File

@@ -3,15 +3,22 @@ module github.com/NVIDIA/nvidia-container-toolkit
go 1.14
require (
github.com/BurntSushi/toml v0.3.1
github.com/containerd/containerd v1.5.7
github.com/containers/podman/v2 v2.2.1
github.com/opencontainers/runtime-spec v1.0.3-0.20211101234015-a3c33d663ebc
github.com/pelletier/go-toml v1.9.3
github.com/BurntSushi/toml v1.0.0
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220715143214-a79f46f2a6f7
github.com/container-orchestrated-devices/container-device-interface v0.4.1-0.20220614144320-dc973e22f674
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/opencontainers/runc v1.1.3
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab
github.com/opencontainers/runtime-tools v0.9.1-0.20220110225228-7e2d60f1e41f // indirect
github.com/pelletier/go-toml v1.9.4
github.com/sirupsen/logrus v1.8.1
github.com/stretchr/testify v1.7.0
github.com/tsaikd/KDGoLib v0.0.0-20191001134900-7f3cf518e07d
github.com/urfave/cli v1.22.4 // indirect
github.com/urfave/cli/v2 v2.3.0
golang.org/x/mod v0.3.0
golang.org/x/sys v0.0.0-20210426230700-d19ff857e887
github.com/xeipuuv/gojsonpointer v0.0.0-20190809123943-df4f5c81cb3b // indirect
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220725232003-c7f47cb02a33
golang.org/x/mod v0.5.0
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
)

993
go.sum

File diff suppressed because it is too large Load Diff

48
internal/config/cli.go Normal file
View File

@@ -0,0 +1,48 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"github.com/pelletier/go-toml"
)
// ContainerCLIConfig stores the options for the nvidia-container-cli
type ContainerCLIConfig struct {
Root string
}
// getContainerCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getContainerCLIConfigFrom(toml *toml.Tree) *ContainerCLIConfig {
cfg := getDefaultContainerCLIConfig()
if toml == nil {
return cfg
}
cfg.Root = toml.GetDefault("nvidia-container-cli.root", cfg.Root).(string)
return cfg
}
// getDefaultContainerCLIConfig defines the default values for the config
func getDefaultContainerCLIConfig() *ContainerCLIConfig {
c := ContainerCLIConfig{
Root: "",
}
return &c
}

114
internal/config/config.go Normal file
View File

@@ -0,0 +1,114 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"fmt"
"io"
"os"
"path"
"github.com/pelletier/go-toml"
)
const (
configOverride = "XDG_CONFIG_HOME"
configFilePath = "nvidia-container-runtime/config.toml"
)
var (
// DefaultExecutableDir specifies the default path to use for executables if they cannot be located in the path.
DefaultExecutableDir = "/usr/bin"
// NVIDIAContainerRuntimeHookExecutable is the executable name for the NVIDIA Container Runtime Hook
NVIDIAContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
// NVIDIAContainerToolkitExecutable is the executable name for the NVIDIA Container Toolkit (an alias for the NVIDIA Container Runtime Hook)
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
configDir = "/etc/"
)
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
type Config struct {
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
}
// GetConfig sets up the config struct. Values are read from a toml file
// or set via the environment.
func GetConfig() (*Config, error) {
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
configDir = XDGConfigDir
}
configFilePath := path.Join(configDir, configFilePath)
tomlFile, err := os.Open(configFilePath)
if err != nil {
return getDefaultConfig(), nil
}
defer tomlFile.Close()
cfg, err := loadConfigFrom(tomlFile)
if err != nil {
return nil, fmt.Errorf("failed to read config values: %v", err)
}
return cfg, nil
}
// loadRuntimeConfigFrom reads the config from the specified Reader
func loadConfigFrom(reader io.Reader) (*Config, error) {
toml, err := toml.LoadReader(reader)
if err != nil {
return nil, err
}
return getConfigFrom(toml)
}
// getConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getConfigFrom(toml *toml.Tree) (*Config, error) {
cfg := getDefaultConfig()
if toml == nil {
return cfg, nil
}
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
runtimeConfig, err := getRuntimeConfigFrom(toml)
if err != nil {
return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err)
}
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
return cfg, nil
}
// getDefaultConfig defines the default values for the config
func getDefaultConfig() *Config {
c := Config{
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
NVIDIACTKConfig: *getDefaultCTKConfig(),
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
}
return &c
}

View File

@@ -0,0 +1,165 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetConfigWithCustomConfig(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
// By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
testDir := filepath.Join(wd, "test")
filename := filepath.Join(testDir, configFilePath)
os.Setenv(configOverride, testDir)
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
cfg, err := GetConfig()
require.NoError(t, err)
require.Equal(t, cfg.NVIDIAContainerRuntimeConfig.DebugFilePath, "/nvidia-container-toolkit.log")
}
func TestGetConfig(t *testing.T) {
testCases := []struct {
description string
contents []string
expectedError error
expectedConfig *Config
}{
{
description: "empty config is default",
expectedConfig: &Config{
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
},
},
{
description: "config options set inline",
contents: []string{
"nvidia-container-cli.root = \"/bar/baz\"",
"nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.experimental = true",
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
"nvidia-container-runtime.log-level = \"debug\"",
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
"nvidia-container-runtime.mode = \"not-auto\"",
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
LogLevel: "debug",
Runtimes: []string{"/some/runtime"},
Mode: "not-auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
},
},
{
description: "config options set in section",
contents: []string{
"[nvidia-container-cli]",
"root = \"/bar/baz\"",
"[nvidia-container-runtime]",
"debug = \"/foo/bar\"",
"experimental = true",
"discover-mode = \"not-legacy\"",
"log-level = \"debug\"",
"runtimes = [\"/some/runtime\",]",
"mode = \"not-auto\"",
"[nvidia-container-runtime.modes.csv]",
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"[nvidia-ctk]",
"path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
LogLevel: "debug",
Runtimes: []string{"/some/runtime"},
Mode: "not-auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
cfg, err := loadConfigFrom(reader)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.EqualValues(t, tc.expectedConfig, cfg)
})
}
}

View File

@@ -0,0 +1,115 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
log "github.com/sirupsen/logrus"
)
// LoadConfig loads the docker config from disk
func LoadConfig(configFilePath string) (map[string]interface{}, error) {
log.Infof("Loading docker config from %v", configFilePath)
info, err := os.Stat(configFilePath)
if os.IsExist(err) && info.IsDir() {
return nil, fmt.Errorf("config file is a directory")
}
cfg := make(map[string]interface{})
if os.IsNotExist(err) {
log.Infof("Config file does not exist, creating new one")
return cfg, nil
}
readBytes, err := ioutil.ReadFile(configFilePath)
if err != nil {
return nil, fmt.Errorf("unable to read config: %v", err)
}
reader := bytes.NewReader(readBytes)
if err := json.NewDecoder(reader).Decode(&cfg); err != nil {
return nil, err
}
log.Infof("Successfully loaded config")
return cfg, nil
}
// UpdateConfig updates the docker config to include the nvidia runtimes
func UpdateConfig(config map[string]interface{}, defaultRuntime string, newRuntimes map[string]interface{}) error {
if defaultRuntime != "" {
config["default-runtime"] = defaultRuntime
}
// Read the existing runtimes
runtimes := make(map[string]interface{})
if _, exists := config["runtimes"]; exists {
runtimes = config["runtimes"].(map[string]interface{})
}
// Add / update the runtime definitions
for name, rt := range newRuntimes {
runtimes[name] = rt
}
// Update the runtimes definition
if len(runtimes) > 0 {
config["runtimes"] = runtimes
}
return nil
}
// FlushConfig flushes the updated/reverted config out to disk
func FlushConfig(cfg map[string]interface{}, configFilePath string) error {
log.Infof("Flushing docker config to %v", configFilePath)
output, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return fmt.Errorf("unable to convert to JSON: %v", err)
}
switch len(output) {
case 0:
err := os.Remove(configFilePath)
if err != nil {
return fmt.Errorf("unable to remove empty file: %v", err)
}
log.Infof("Config empty, removing file")
default:
f, err := os.Create(configFilePath)
if err != nil {
return fmt.Errorf("unable to open %v for writing: %v", configFilePath, err)
}
defer f.Close()
_, err = f.WriteString(string(output))
if err != nil {
return fmt.Errorf("unable to write output: %v", err)
}
}
log.Infof("Successfully flushed config")
return nil
}

View File

@@ -0,0 +1,228 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package docker
import (
"encoding/json"
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestUpdateConfigDefaultRuntime(t *testing.T) {
testCases := []struct {
config map[string]interface{}
defaultRuntime string
runtimeName string
expectedDefaultRuntimeName interface{}
}{
{
defaultRuntime: "",
expectedDefaultRuntimeName: nil,
},
{
defaultRuntime: "NAME",
expectedDefaultRuntimeName: "NAME",
},
{
config: map[string]interface{}{
"default-runtime": "ALREADY_SET",
},
defaultRuntime: "",
expectedDefaultRuntimeName: "ALREADY_SET",
},
{
config: map[string]interface{}{
"default-runtime": "ALREADY_SET",
},
defaultRuntime: "NAME",
expectedDefaultRuntimeName: "NAME",
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
if tc.config == nil {
tc.config = make(map[string]interface{})
}
err := UpdateConfig(tc.config, tc.defaultRuntime, nil)
require.NoError(t, err)
defaultRuntimeName := tc.config["default-runtime"]
require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName)
})
}
}
func TestUpdateConfigRuntimes(t *testing.T) {
testCases := []struct {
config map[string]interface{}
runtimes map[string]interface{}
expectedConfig map[string]interface{}
}{
{
config: map[string]interface{}{},
runtimes: map[string]interface{}{
"runtime1": map[string]interface{}{
"path": "/test/runtime/dir/runtime1",
"args": []string{},
},
"runtime2": map[string]interface{}{
"path": "/test/runtime/dir/runtime2",
"args": []string{},
},
},
expectedConfig: map[string]interface{}{
"runtimes": map[string]interface{}{
"runtime1": map[string]interface{}{
"path": "/test/runtime/dir/runtime1",
"args": []string{},
},
"runtime2": map[string]interface{}{
"path": "/test/runtime/dir/runtime2",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{
"runtimes": map[string]interface{}{
"runtime1": map[string]interface{}{
"path": "runtime1",
"args": []string{},
},
},
},
runtimes: map[string]interface{}{
"runtime1": map[string]interface{}{
"path": "/test/runtime/dir/runtime1",
"args": []string{},
},
"runtime2": map[string]interface{}{
"path": "/test/runtime/dir/runtime2",
"args": []string{},
},
},
expectedConfig: map[string]interface{}{
"runtimes": map[string]interface{}{
"runtime1": map[string]interface{}{
"path": "/test/runtime/dir/runtime1",
"args": []string{},
},
"runtime2": map[string]interface{}{
"path": "/test/runtime/dir/runtime2",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{
"runtimes": map[string]interface{}{
"not-nvidia": map[string]interface{}{
"path": "some-other-path",
"args": []string{},
},
},
},
runtimes: map[string]interface{}{
"runtime1": map[string]interface{}{
"path": "/test/runtime/dir/runtime1",
"args": []string{},
},
},
expectedConfig: map[string]interface{}{
"runtimes": map[string]interface{}{
"not-nvidia": map[string]interface{}{
"path": "some-other-path",
"args": []string{},
},
"runtime1": map[string]interface{}{
"path": "/test/runtime/dir/runtime1",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{
"exec-opts": []string{"native.cgroupdriver=systemd"},
"log-driver": "json-file",
"log-opts": map[string]string{
"max-size": "100m",
},
"storage-driver": "overlay2",
},
runtimes: map[string]interface{}{
"runtime1": map[string]interface{}{
"path": "/test/runtime/dir/runtime1",
"args": []string{},
},
},
expectedConfig: map[string]interface{}{
"exec-opts": []string{"native.cgroupdriver=systemd"},
"log-driver": "json-file",
"log-opts": map[string]string{
"max-size": "100m",
},
"storage-driver": "overlay2",
"runtimes": map[string]interface{}{
"runtime1": map[string]interface{}{
"path": "/test/runtime/dir/runtime1",
"args": []string{},
},
},
},
},
{
config: map[string]interface{}{
"exec-opts": []string{"native.cgroupdriver=systemd"},
"log-driver": "json-file",
"log-opts": map[string]string{
"max-size": "100m",
},
"storage-driver": "overlay2",
},
expectedConfig: map[string]interface{}{
"exec-opts": []string{"native.cgroupdriver=systemd"},
"log-driver": "json-file",
"log-opts": map[string]string{
"max-size": "100m",
},
"storage-driver": "overlay2",
},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
err := UpdateConfig(tc.config, "", tc.runtimes)
require.NoError(t, err)
configContent, err := json.MarshalIndent(tc.config, "", " ")
require.NoError(t, err)
expectedContent, err := json.MarshalIndent(tc.expectedConfig, "", " ")
require.NoError(t, err)
require.EqualValues(t, string(expectedContent), string(configContent))
})
}
}

View File

@@ -0,0 +1,174 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"fmt"
"strconv"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
)
const (
envCUDAVersion = "CUDA_VERSION"
envNVRequirePrefix = "NVIDIA_REQUIRE_"
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
)
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type CUDA map[string]string
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
// The process environment is read (if present) to construc the CUDA Image.
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
if spec == nil || spec.Process == nil {
return NewCUDAImageFromEnv(nil)
}
return NewCUDAImageFromEnv(spec.Process.Env)
}
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
// is a list of strings of the form ENVAR=VALUE.
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
c := make(CUDA)
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid environment variable: %v", e)
}
c[parts[0]] = parts[1]
}
return c, nil
}
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
// image is considered legacy if it has a CUDA_VERSION environment variable defined
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
func (i CUDA) IsLegacy() bool {
legacyCudaVersion := i[envCUDAVersion]
cudaRequire := i[envNVRequireCUDA]
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
// variables.
func (i CUDA) GetRequirements() ([]string, error) {
// TODO: We need not process this if disable require is set, but this will be done
// in a single follow-up to ensure that the behavioural change is accurately captured.
// if i.HasDisableRequire() {
// return nil, nil
// }
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
for name, value := range i {
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
requirements = append(requirements, value)
}
}
if i.IsLegacy() {
v, err := i.legacyVersion()
if err != nil {
return nil, fmt.Errorf("failed to get version: %v", err)
}
cudaRequire := fmt.Sprintf("cuda>=%s", v)
requirements = append(requirements, cudaRequire)
}
return requirements, nil
}
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
// to a valid (true) boolean value this can be used to disable the requirement checks
func (i CUDA) HasDisableRequire() bool {
if disable, exists := i[envNVDisableRequire]; exists {
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
d, _ := strconv.ParseBool(disable)
return d
}
return false
}
// DevicesFromEnvvars returns the devices requested by the image through environment variables
func (i CUDA) DevicesFromEnvvars(envVars ...string) []string {
// Grab a reference to devices from the first envvar
// in the list that actually exists in the environment.
var devices *string
for _, envVar := range envVars {
if devs, ok := i[envVar]; ok {
devices = &devs
break
}
}
// Environment variable unset with legacy image: default to "all".
if devices == nil && i.IsLegacy() {
return []string{"all"}
}
// Environment variable unset or empty or "void": return nil
if devices == nil || len(*devices) == 0 || *devices == "void" {
return nil
}
// Environment variable set to "none": reset to "".
if *devices == "none" {
return []string{""}
}
return strings.Split(*devices, ",")
}
func (i CUDA) legacyVersion() (string, error) {
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
if err != nil {
return "", fmt.Errorf("invalid CUDA version: %v", err)
}
return majorMinor, nil
}
func parseMajorMinorVersion(version string) (string, error) {
vVersion := "v" + strings.TrimPrefix(version, "v")
if !semver.IsValid(vVersion) {
return "", fmt.Errorf("invalid version string")
}
majorMinor := strings.TrimPrefix(semver.MajorMinor(vVersion), "v")
parts := strings.Split(majorMinor, ".")
var err error
_, err = strconv.ParseUint(parts[0], 10, 32)
if err != nil {
return "", fmt.Errorf("invalid major version")
}
_, err = strconv.ParseUint(parts[1], 10, 32)
if err != nil {
return "", fmt.Errorf("invalid minor version")
}
return majorMinor, nil
}

View File

@@ -0,0 +1,123 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestParseMajorMinorVersionValid(t *testing.T) {
var tests = []struct {
version string
expected string
}{
{"0", "0.0"},
{"8", "8.0"},
{"7.5", "7.5"},
{"9.0.116", "9.0"},
{"4294967295.4294967295.4294967295", "4294967295.4294967295"},
{"v11.6", "11.6"},
}
for _, c := range tests {
t.Run(c.version, func(t *testing.T) {
version, err := parseMajorMinorVersion(c.version)
require.NoError(t, err)
require.Equal(t, c.expected, version)
})
}
}
func TestParseMajorMinorVersionInvalid(t *testing.T) {
var tests = []string{
"foo",
"foo.5.10",
"9.0.116.50",
"9.0.116foo",
"7.foo",
"9.0.bar",
"9.4294967296",
"9.0.116.",
"9..0",
"9.",
".5.10",
"-9",
"+9",
"-9.1.116",
"-9.-1.-116",
}
for _, c := range tests {
t.Run(c, func(t *testing.T) {
_, err := parseMajorMinorVersion(c)
require.Error(t, err)
})
}
}
func TestGetRequirements(t *testing.T) {
testCases := []struct {
description string
env []string
requirements []string
}{
{
description: "NVIDIA_REQUIRE_JETPACK is ignored",
env: []string{"NVIDIA_REQUIRE_JETPACK=csv-mounts=all"},
requirements: nil,
},
{
description: "NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS is ignored",
env: []string{"NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS=base-only"},
requirements: nil,
},
{
description: "single requirement set",
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6"},
requirements: []string{"cuda>=11.6"},
},
{
description: "requirements are concatenated requirement set",
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
requirements: []string{"cuda>=11.6", "brand=tesla"},
},
{
description: "legacy image",
env: []string{"CUDA_VERSION=11.6"},
requirements: []string{"cuda>=11.6"},
},
{
description: "legacy image with additional requirement",
env: []string{"CUDA_VERSION=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
requirements: []string{"cuda>=11.6", "brand=tesla"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
image, err := NewCUDAImageFromEnv(tc.env)
require.NoError(t, err)
requirements, err := image.GetRequirements()
require.NoError(t, err)
require.ElementsMatch(t, tc.requirements, requirements)
})
}
}

101
internal/config/runtime.go Normal file
View File

@@ -0,0 +1,101 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"fmt"
"github.com/pelletier/go-toml"
"github.com/sirupsen/logrus"
)
const (
dockerRuncExecutableName = "docker-runc"
runcExecutableName = "runc"
auto = "auto"
)
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
type RuntimeConfig struct {
DebugFilePath string `toml:"debug"`
// LogLevel defines the logging level for the application
LogLevel string `toml:"log-level"`
// Runtimes defines the candidates for the low-level runtime
Runtimes []string `toml:"runtimes"`
Mode string `toml:"mode"`
Modes modesConfig `toml:"modes"`
}
// modesConfig defines (optional) per-mode configs
type modesConfig struct {
CSV csvModeConfig `toml:"csv"`
CDI cdiModeConfig `toml:"cdi"`
}
type cdiModeConfig struct {
// SpecDirs allows for the default spec dirs for CDI to be overridden
SpecDirs []string `toml:"spec-dirs"`
}
type csvModeConfig struct {
MountSpecPath string `toml:"mount-spec-path"`
}
// dummy allows us to unmarshal only a RuntimeConfig from a *toml.Tree
type dummy struct {
Runtime RuntimeConfig `toml:"nvidia-container-runtime"`
}
// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
cfg := GetDefaultRuntimeConfig()
if toml == nil {
return cfg, nil
}
d := dummy{
Runtime: *cfg,
}
if err := toml.Unmarshal(&d); err != nil {
return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err)
}
return &d.Runtime, nil
}
// GetDefaultRuntimeConfig defines the default values for the config
func GetDefaultRuntimeConfig() *RuntimeConfig {
c := RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: logrus.InfoLevel.String(),
Runtimes: []string{
dockerRuncExecutableName,
runcExecutableName,
},
Mode: auto,
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
}
return &c
}

View File

@@ -0,0 +1,46 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import "github.com/pelletier/go-toml"
// CTKConfig stores the config options for the NVIDIA Container Toolkit CLI (nvidia-ctk)
type CTKConfig struct {
Path string `toml:"path"`
}
// getCTKConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getCTKConfigFrom(toml *toml.Tree) *CTKConfig {
cfg := getDefaultCTKConfig()
if toml == nil {
return cfg
}
cfg.Path = toml.GetDefault("nvidia-ctk.path", cfg.Path).(string)
return cfg
}
// getDefaultCTKConfig defines the default values for the config
func getDefaultCTKConfig() *CTKConfig {
c := CTKConfig{
Path: "nvidia-ctk",
}
return &c
}

137
internal/cuda/cuda.go Normal file
View File

@@ -0,0 +1,137 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package cuda
import (
"fmt"
"github.com/NVIDIA/go-nvml/pkg/dl"
)
/*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#ifdef _WIN32
#define CUDAAPI __stdcall
#else
#define CUDAAPI
#endif
typedef int CUdevice;
typedef enum CUdevice_attribute_enum {
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
} CUdevice_attribute;
typedef enum cudaError_enum {
CUDA_SUCCESS = 0
} CUresult;
CUresult CUDAAPI cuInit(unsigned int Flags);
CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
*/
import "C"
const (
libraryName = "libcuda.so.1"
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
)
// cuda stores a reference the cuda dynamic library
var lib *dl.DynamicLibrary
// Version returns the CUDA version of the driver as a string or an error if this
// cannot be determined.
func Version() (string, error) {
lib, err := load()
if err != nil {
return "", err
}
defer lib.Close()
if err := lib.Lookup("cuDriverGetVersion"); err != nil {
return "", fmt.Errorf("failed to lookup symbol: %v", err)
}
var version C.int
if result := C.cuDriverGetVersion(&version); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to get CUDA version: result=%v", result)
}
major := version / 1000
minor := version % 100 / 10
return fmt.Sprintf("%d.%d", major, minor), nil
}
// ComputeCapability returns the CUDA compute capability of a device with the specified index as a string
// or an error if this cannot be determined.
func ComputeCapability(index int) (string, error) {
lib, err := load()
if err != nil {
return "", err
}
defer lib.Close()
if err := lib.Lookup("cuInit"); err != nil {
return "", fmt.Errorf("failed to lookup symbol: %v", err)
}
if err := lib.Lookup("cuDeviceGet"); err != nil {
return "", fmt.Errorf("failed to lookup symbol: %v", err)
}
if err := lib.Lookup("cuDeviceGetAttribute"); err != nil {
return "", fmt.Errorf("failed to lookup symbol: %v", err)
}
if result := C.cuInit(C.uint(0)); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to initialize CUDA: result=%v", result)
}
var device C.CUdevice
// NOTE: We only query the first device
if result := C.cuDeviceGet(&device, C.int(index)); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to get CUDA device %v: result=%v", 0, result)
}
var major C.int
if result := C.cuDeviceGetAttribute(&major, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to get CUDA compute capability major for device %v : result=%v", 0, result)
}
var minor C.int
if result := C.cuDeviceGetAttribute(&minor, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to get CUDA compute capability minor for device %v: result=%v", 0, result)
}
return fmt.Sprintf("%d.%d", major, minor), nil
}
func load() (*dl.DynamicLibrary, error) {
lib := dl.New(libraryName, libraryLoadFlags)
if lib == nil {
return nil, fmt.Errorf("error instantiating DynamicLibrary for CUDA")
}
err := lib.Open()
if err != nil {
return nil, fmt.Errorf("error opening DynamicLibrary for CUDA: %v", err)
}
return lib, nil
}

View File

@@ -0,0 +1,62 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
// charDevices is a discover for a list of character devices
type charDevices mounts
var _ Discover = (*charDevices)(nil)
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
func NewCharDeviceDiscoverer(logger *logrus.Logger, devices []string, root string) Discover {
locator := lookup.NewCharDeviceLocator(logger, root)
return NewDeviceDiscoverer(logger, locator, root, devices)
}
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
func NewDeviceDiscoverer(logger *logrus.Logger, locator lookup.Locator, root string, devices []string) Discover {
m := NewMounts(logger, locator, root, devices).(*mounts)
return (*charDevices)(m)
}
// Mounts returns the discovered mounts for the charDevices.
// Since this explicitly specifies a device list, the mounts are nil.
func (d *charDevices) Mounts() ([]Mount, error) {
return nil, nil
}
// Devices returns the discovered devices for the charDevices.
// Here the device nodes are first discovered as mounts and these are converted to devices.
func (d *charDevices) Devices() ([]Device, error) {
devicesAsMounts, err := (*mounts)(d).Mounts()
if err != nil {
return nil, err
}
var devices []Device
for _, mount := range devicesAsMounts {
devices = append(devices, Device(mount))
}
return devices, nil
}

View File

@@ -0,0 +1,83 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestCharDevices(t *testing.T) {
logger, logHook := testlog.NewNullLogger()
testCases := []struct {
description string
input *charDevices
expectedMounts []Mount
expectedMountsError error
expectedDevicesError error
expectedDevices []Device
}{
{
description: "dev mounts are empty",
input: (*charDevices)(
&mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(string) ([]string, error) {
return []string{"located"}, nil
},
},
required: []string{"required"},
},
),
expectedDevices: []Device{{Path: "located", HostPath: "located"}},
},
{
description: "dev devices returns error for nil lookup",
input: &charDevices{},
expectedDevicesError: fmt.Errorf("no lookup defined"),
},
}
for _, tc := range testCases {
logHook.Reset()
t.Run(tc.description, func(t *testing.T) {
tc.input.logger = logger
mounts, err := tc.input.Mounts()
if tc.expectedMountsError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.ElementsMatch(t, tc.expectedMounts, mounts)
devices, err := tc.input.Devices()
if tc.expectedDevicesError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.ElementsMatch(t, tc.expectedDevices, devices)
})
}
}

107
internal/discover/csv.go Normal file
View File

@@ -0,0 +1,107 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
// NewFromCSVFiles creates a discoverer for the specified CSV files. A logger is also supplied.
// The constructed discoverer is comprised of a list, with each element in the list being associated with a
// single CSV files.
func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discover, error) {
if len(files) == 0 {
logger.Warnf("No CSV files specified")
return None{}, nil
}
symlinkLocator := lookup.NewSymlinkLocator(logger, root)
locators := map[csv.MountSpecType]lookup.Locator{
csv.MountSpecDev: lookup.NewCharDeviceLocator(logger, root),
csv.MountSpecDir: lookup.NewDirectoryLocator(logger, root),
// Libraries and symlinks are handled in the same way
csv.MountSpecLib: symlinkLocator,
csv.MountSpecSym: symlinkLocator,
}
var mountSpecs []*csv.MountSpec
for _, filename := range files {
targets, err := loadCSVFile(logger, filename)
if err != nil {
logger.Warnf("Skipping CSV file %v: %v", filename, err)
continue
}
mountSpecs = append(mountSpecs, targets...)
}
return newFromMountSpecs(logger, locators, root, mountSpecs)
}
// loadCSVFile loads the specified CSV file and returns the list of mount specs
func loadCSVFile(logger *logrus.Logger, filename string) ([]*csv.MountSpec, error) {
// Create a discoverer for each file-kind combination
targets, err := csv.NewCSVFileParser(logger, filename).Parse()
if err != nil {
return nil, fmt.Errorf("failed to parse CSV file: %v", err)
}
if len(targets) == 0 {
return nil, fmt.Errorf("CSV file is empty")
}
return targets, nil
}
// newFromMountSpecs creates a discoverer for the CSV file. A logger is also supplied.
// A list of csvDiscoverers is returned, with each being associated with a single MountSpecType.
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, root string, targets []*csv.MountSpec) (Discover, error) {
if len(targets) == 0 {
return &None{}, nil
}
var discoverers []Discover
var mountSpecTypes []csv.MountSpecType
candidatesByType := make(map[csv.MountSpecType][]string)
for _, t := range targets {
if _, exists := candidatesByType[t.Type]; !exists {
mountSpecTypes = append(mountSpecTypes, t.Type)
}
candidatesByType[t.Type] = append(candidatesByType[t.Type], t.Path)
}
for _, t := range mountSpecTypes {
locator, exists := locators[t]
if !exists {
return nil, fmt.Errorf("no locator defined for '%v'", t)
}
var m Discover
switch t {
case csv.MountSpecDev:
m = NewDeviceDiscoverer(logger, locator, root, candidatesByType[t])
default:
m = NewMounts(logger, locator, root, candidatesByType[t])
}
discoverers = append(discoverers, m)
}
return &list{discoverers: discoverers}, nil
}

View File

@@ -0,0 +1,131 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package csv
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/sirupsen/logrus"
)
const (
// DefaultMountSpecPath is default location of CSV files that define the modifications required to the OCI spec
DefaultMountSpecPath = "/etc/nvidia-container-runtime/host-files-for-container.d"
)
// GetFileList returns the (non-recursive) list of CSV files in the specified
// folder
func GetFileList(root string) ([]string, error) {
contents, err := os.ReadDir(root)
if err != nil && errors.Is(err, os.ErrNotExist) {
return nil, nil
} else if err != nil {
return nil, fmt.Errorf("failed to read the contents of %v: %v", root, err)
}
var csvFilePaths []string
for _, c := range contents {
if c.IsDir() {
continue
}
if c.Name() == ".csv" {
continue
}
ext := strings.ToLower(filepath.Ext(c.Name()))
if ext != ".csv" {
continue
}
csvFilePaths = append(csvFilePaths, filepath.Join(root, c.Name()))
}
return csvFilePaths, nil
}
// BaseFilesOnly filters out non-base CSV files from the list of CSV files.
func BaseFilesOnly(filenames []string) []string {
filter := map[string]bool{
"l4t.csv": true,
"drivers.csv": true,
"devices.csv": true,
}
var selected []string
for _, file := range filenames {
base := filepath.Base(file)
if filter[base] {
selected = append(selected, file)
}
}
return selected
}
// Parser specifies an interface for parsing MountSpecs
type Parser interface {
Parse() ([]*MountSpec, error)
}
type csv struct {
logger *logrus.Logger
filename string
}
// NewCSVFileParser creates a new parser for reading MountSpecs from the specified CSV file
func NewCSVFileParser(logger *logrus.Logger, filename string) Parser {
p := csv{
logger: logger,
filename: filename,
}
return &p
}
// Parse parses the csv file and returns a list of MountSpecs in the file
func (p csv) Parse() ([]*MountSpec, error) {
reader, err := os.Open(p.filename)
if err != nil {
return nil, fmt.Errorf("failed to open %v for reading: %v", p.filename, err)
}
defer reader.Close()
return p.parseFromReader(reader), nil
}
// parseFromReader parses the specified file and returns a list of required jetson mounts
func (p csv) parseFromReader(reader io.Reader) []*MountSpec {
var targets []*MountSpec
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
target, err := NewMountSpecFromLine(line)
if err != nil {
p.logger.Debugf("Skipping invalid mount spec '%v': %v", line, err)
continue
}
targets = append(targets, target)
}
return targets
}

View File

@@ -0,0 +1,83 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package csv
import (
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/stretchr/testify/require"
)
func TestGetFileList(t *testing.T) {
moduleRoot, _ := test.GetModuleRoot()
testCases := []struct {
description string
root string
files []string
expectedError error
}{
{
description: "returns list of CSV files",
root: "test/input/csv_samples/",
files: []string{
"jetson.csv",
"simple_wrong.csv",
"simple.csv",
"spaced.csv",
},
},
{
description: "handles empty folder",
root: "test/input/csv_samples/empty",
},
{
description: "handles non-existent folder",
root: "test/input/csv_samples/NONEXISTENT",
},
{
description: "handles non-existent folder root",
root: "/NONEXISTENT/test/input/csv_samples/",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
root := filepath.Join(moduleRoot, tc.root)
files, err := GetFileList(root)
if tc.expectedError != nil {
require.Error(t, err)
require.Empty(t, files)
return
}
require.NoError(t, err)
var foundFiles []string
for _, f := range files {
require.Equal(t, root, filepath.Dir(f))
require.Equal(t, ".csv", filepath.Ext(f))
foundFiles = append(foundFiles, filepath.Base(f))
}
require.ElementsMatch(t, tc.files, foundFiles)
})
}
}

View File

@@ -0,0 +1,74 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package csv
import (
"fmt"
"strings"
)
// MountSpecType defines the mount types allowed in a CSV file
type MountSpecType string
const (
// MountSpecDev is used for character devices
MountSpecDev = MountSpecType("dev")
// MountSpecDir is used for directories
MountSpecDir = MountSpecType("dir")
// MountSpecLib is used for libraries or regular files
MountSpecLib = MountSpecType("lib")
// MountSpecSym is used for symlinks.
MountSpecSym = MountSpecType("sym")
)
// MountSpec represents a Jetson mount consisting of a type and a path.
type MountSpec struct {
Type MountSpecType
Path string
}
// NewMountSpecFromLine parses the specified line and returns the MountSpec or an error if the line is malformed
func NewMountSpecFromLine(line string) (*MountSpec, error) {
parts := strings.SplitN(strings.TrimSpace(line), ",", 2)
if len(parts) < 2 {
return nil, fmt.Errorf("failed to parse line: %v", line)
}
mountType := strings.TrimSpace(parts[0])
path := strings.TrimSpace(parts[1])
return NewMountSpec(mountType, path)
}
// NewMountSpec creates a MountSpec with the specified type and path. An error is returned if the type is invalid.
func NewMountSpec(mountType string, path string) (*MountSpec, error) {
mt := MountSpecType(mountType)
switch mt {
case MountSpecDev, MountSpecLib, MountSpecSym, MountSpecDir:
default:
return nil, fmt.Errorf("unexpected mount type: %v", mt)
}
if path == "" {
return nil, fmt.Errorf("invalid path: %v", path)
}
mount := MountSpec{
Type: mt,
Path: path,
}
return &mount, nil
}

View File

@@ -0,0 +1,82 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package csv
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestNewMountSpecFromLine(t *testing.T) {
parseError := fmt.Errorf("failed to parse line")
unexpectedError := fmt.Errorf("unexpected mount type")
testCases := []struct {
line string
expectedError error
expectedValue MountSpec
}{
{
line: "",
expectedError: parseError,
},
{
line: "\t",
expectedError: parseError,
},
{
line: ",",
expectedError: parseError,
},
{
line: "dev,",
expectedError: parseError,
},
{
line: "dev ,/a/path",
expectedValue: MountSpec{
Path: "/a/path",
Type: "dev",
},
},
{
line: "dev ,/a/path,with,commas",
expectedValue: MountSpec{
Path: "/a/path,with,commas",
Type: "dev",
},
},
{
line: "not-dev ,/a/path",
expectedError: unexpectedError,
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
target, err := NewMountSpecFromLine(tc.line)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, &tc.expectedValue, target)
})
}
}

View File

@@ -0,0 +1,142 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestNewFromMountSpec(t *testing.T) {
logger, _ := testlog.NewNullLogger()
locators := map[csv.MountSpecType]lookup.Locator{
"dev": &lookup.LocatorMock{},
"lib": &lookup.LocatorMock{},
}
testCases := []struct {
description string
root string
targets []*csv.MountSpec
expectedError error
expectedDiscoverer Discover
}{
{
description: "empty targets returns None discoverer list",
expectedDiscoverer: &None{},
},
{
description: "unexpected locator returns error",
targets: []*csv.MountSpec{
{
Type: "foo",
Path: "bar",
},
},
expectedError: fmt.Errorf("no locator defined for foo"),
},
{
description: "creates discoverers based on type",
targets: []*csv.MountSpec{
{
Type: "dev",
Path: "dev0",
},
{
Type: "lib",
Path: "lib0",
},
{
Type: "dev",
Path: "dev1",
},
},
expectedDiscoverer: &list{
discoverers: []Discover{
(*charDevices)(
&mounts{
logger: logger,
lookup: locators["dev"],
root: "/",
required: []string{"dev0", "dev1"},
},
),
&mounts{
logger: logger,
lookup: locators["lib"],
root: "/",
required: []string{"lib0"},
},
},
},
},
{
description: "sets root",
targets: []*csv.MountSpec{
{
Type: "dev",
Path: "dev0",
},
{
Type: "lib",
Path: "lib0",
},
{
Type: "dev",
Path: "dev1",
},
},
root: "/some/root",
expectedDiscoverer: &list{
discoverers: []Discover{
(*charDevices)(
&mounts{
logger: logger,
lookup: locators["dev"],
root: "/some/root",
required: []string{"dev0", "dev1"},
},
),
&mounts{
logger: logger,
lookup: locators["lib"],
root: "/some/root",
required: []string{"lib0"},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
discoverer, err := newFromMountSpecs(logger, locators, tc.root, tc.targets)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, tc.expectedDiscoverer, discoverer)
})
}
}

View File

@@ -0,0 +1,50 @@
/*
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
// Config represents the configuration options for discovery
type Config struct {
Root string
NVIDIAContainerToolkitCLIExecutablePath string
}
// Device represents a discovered character device.
type Device struct {
HostPath string
Path string
}
// Mount represents a discovered mount.
type Mount struct {
HostPath string
Path string
}
// Hook represents a discovered hook.
type Hook struct {
Lifecycle string
Path string
Args []string
}
//go:generate moq -stub -out discover_mock.go . Discover
// Discover defines an interface for discovering the devices, mounts, and hooks available on a system
type Discover interface {
Devices() ([]Device, error)
Mounts() ([]Mount, error)
Hooks() ([]Hook, error)
}

View File

@@ -0,0 +1,150 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package discover
import (
"sync"
)
// Ensure, that DiscoverMock does implement Discover.
// If this is not the case, regenerate this file with moq.
var _ Discover = &DiscoverMock{}
// DiscoverMock is a mock implementation of Discover.
//
// func TestSomethingThatUsesDiscover(t *testing.T) {
//
// // make and configure a mocked Discover
// mockedDiscover := &DiscoverMock{
// DevicesFunc: func() ([]Device, error) {
// panic("mock out the Devices method")
// },
// HooksFunc: func() ([]Hook, error) {
// panic("mock out the Hooks method")
// },
// MountsFunc: func() ([]Mount, error) {
// panic("mock out the Mounts method")
// },
// }
//
// // use mockedDiscover in code that requires Discover
// // and then make assertions.
//
// }
type DiscoverMock struct {
// DevicesFunc mocks the Devices method.
DevicesFunc func() ([]Device, error)
// HooksFunc mocks the Hooks method.
HooksFunc func() ([]Hook, error)
// MountsFunc mocks the Mounts method.
MountsFunc func() ([]Mount, error)
// calls tracks calls to the methods.
calls struct {
// Devices holds details about calls to the Devices method.
Devices []struct {
}
// Hooks holds details about calls to the Hooks method.
Hooks []struct {
}
// Mounts holds details about calls to the Mounts method.
Mounts []struct {
}
}
lockDevices sync.RWMutex
lockHooks sync.RWMutex
lockMounts sync.RWMutex
}
// Devices calls DevicesFunc.
func (mock *DiscoverMock) Devices() ([]Device, error) {
callInfo := struct {
}{}
mock.lockDevices.Lock()
mock.calls.Devices = append(mock.calls.Devices, callInfo)
mock.lockDevices.Unlock()
if mock.DevicesFunc == nil {
var (
devicesOut []Device
errOut error
)
return devicesOut, errOut
}
return mock.DevicesFunc()
}
// DevicesCalls gets all the calls that were made to Devices.
// Check the length with:
// len(mockedDiscover.DevicesCalls())
func (mock *DiscoverMock) DevicesCalls() []struct {
} {
var calls []struct {
}
mock.lockDevices.RLock()
calls = mock.calls.Devices
mock.lockDevices.RUnlock()
return calls
}
// Hooks calls HooksFunc.
func (mock *DiscoverMock) Hooks() ([]Hook, error) {
callInfo := struct {
}{}
mock.lockHooks.Lock()
mock.calls.Hooks = append(mock.calls.Hooks, callInfo)
mock.lockHooks.Unlock()
if mock.HooksFunc == nil {
var (
hooksOut []Hook
errOut error
)
return hooksOut, errOut
}
return mock.HooksFunc()
}
// HooksCalls gets all the calls that were made to Hooks.
// Check the length with:
// len(mockedDiscover.HooksCalls())
func (mock *DiscoverMock) HooksCalls() []struct {
} {
var calls []struct {
}
mock.lockHooks.RLock()
calls = mock.calls.Hooks
mock.lockHooks.RUnlock()
return calls
}
// Mounts calls MountsFunc.
func (mock *DiscoverMock) Mounts() ([]Mount, error) {
callInfo := struct {
}{}
mock.lockMounts.Lock()
mock.calls.Mounts = append(mock.calls.Mounts, callInfo)
mock.lockMounts.Unlock()
if mock.MountsFunc == nil {
var (
mountsOut []Mount
errOut error
)
return mountsOut, errOut
}
return mock.MountsFunc()
}
// MountsCalls gets all the calls that were made to Mounts.
// Check the length with:
// len(mockedDiscover.MountsCalls())
func (mock *DiscoverMock) MountsCalls() []struct {
} {
var calls []struct {
}
mock.lockMounts.RLock()
calls = mock.calls.Mounts
mock.lockMounts.RUnlock()
return calls
}

77
internal/discover/gds.go Normal file
View File

@@ -0,0 +1,77 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
type gdsDeviceDiscoverer struct {
None
logger *logrus.Logger
devices Discover
mounts Discover
}
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
func NewGDSDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
[]string{"/dev/nvidia-fs*"},
root,
)
udev := NewMounts(
logger,
lookup.NewDirectoryLocator(logger, root),
root,
[]string{"/run/udev"},
)
cufile := NewMounts(
logger,
lookup.NewFileLocator(logger, root),
root,
[]string{"/etc/cufile.json"},
)
d := gdsDeviceDiscoverer{
logger: logger,
devices: devices,
mounts: Merge(udev, cufile),
}
return &d, nil
}
// Devices discovers the nvidia-fs device nodes for use with GPUDirect Storage
func (d *gdsDeviceDiscoverer) Devices() ([]Device, error) {
return d.devices.Devices()
}
// Mounts discovers the required mounts for GPUDirect Storage.
// If no devices are discovered the discovered mounts are empty
func (d *gdsDeviceDiscoverer) Mounts() ([]Mount, error) {
devices, err := d.Devices()
if err != nil || len(devices) == 0 {
d.logger.Debugf("No nvidia-fs devices detected; skipping detection of mounts")
return nil, nil
}
return d.mounts.Mounts()
}

View File

@@ -0,0 +1,131 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"path/filepath"
"sort"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/sirupsen/logrus"
)
// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified
func NewLDCacheUpdateHook(logger *logrus.Logger, mounts Discover, cfg *Config) (Discover, error) {
d := ldconfig{
logger: logger,
mountsFrom: mounts,
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath,
}
return &d, nil
}
const (
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
)
type ldconfig struct {
None
logger *logrus.Logger
mountsFrom Discover
lookup lookup.Locator
nvidiaCTKExecutablePath string
}
// Hooks checks the required mounts for libraries and returns a hook to update the LDcache for the discovered paths.
func (d ldconfig) Hooks() ([]Hook, error) {
mounts, err := d.mountsFrom.Mounts()
if err != nil {
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
}
libDirs := getLibDirs(mounts)
hookPath := nvidiaCTKDefaultFilePath
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
if err != nil {
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
} else if len(targets) == 0 {
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
} else {
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
hookPath = targets[0]
}
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
args := []string{hookPath, "hook", "update-ldcache"}
for _, f := range libDirs {
args = append(args, "--folder", f)
}
h := Hook{
Lifecycle: cdi.CreateContainerHook,
Path: hookPath,
Args: args,
}
return []Hook{h}, nil
}
// getLibDirs extracts the library dirs from the specified mounts
func getLibDirs(mounts []Mount) []string {
var paths []string
checked := make(map[string]bool)
for _, m := range mounts {
dir := filepath.Dir(m.Path)
if dir == "" {
continue
}
_, exists := checked[dir]
if exists {
continue
}
checked[dir] = isLibName(m.Path)
if checked[dir] {
paths = append(paths, dir)
}
}
sort.Strings(paths)
return paths
}
// isLibName checks if the specified filename is a library (i.e. ends in `.so*`)
func isLibName(filename string) bool {
base := filepath.Base(filename)
isLib, err := filepath.Match("lib?*.so*", base)
if !isLib || err != nil {
return false
}
parts := strings.Split(base, ".so")
if len(parts) == 1 {
return true
}
return parts[len(parts)-1] == "" || strings.HasPrefix(parts[len(parts)-1], ".")
}

View File

@@ -0,0 +1,65 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestIsLibName(t *testing.T) {
testCases := []struct {
name string
isLib bool
}{
{
name: "",
isLib: false,
},
{
name: "lib/not/.so",
isLib: false,
},
{
name: "lib.so",
isLib: false,
},
{
name: "notlibcuda.so",
isLib: false,
},
{
name: "libcuda.so",
isLib: true,
},
{
name: "libcuda.so.1",
isLib: true,
},
{
name: "libcuda.soNOT",
isLib: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
require.Equal(t, tc.isLib, isLibName(tc.name))
})
}
}

82
internal/discover/list.go Normal file
View File

@@ -0,0 +1,82 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import "fmt"
// list is a discoverer that contains a list of Discoverers. The output of the
// Mounts functions is the concatenation of the output for each of the
// elements in the list.
type list struct {
discoverers []Discover
}
var _ Discover = (*list)(nil)
// Merge creates a discoverer that is the composite of a list of discoveres.
func Merge(d ...Discover) Discover {
l := list{
discoverers: d,
}
return &l
}
// Devices returns all devices from the included discoverers
func (d list) Devices() ([]Device, error) {
var allDevices []Device
for i, di := range d.discoverers {
devices, err := di.Devices()
if err != nil {
return nil, fmt.Errorf("error discovering devices for discoverer %v: %v", i, err)
}
allDevices = append(allDevices, devices...)
}
return allDevices, nil
}
// Mounts returns all mounts from the included discoverers
func (d list) Mounts() ([]Mount, error) {
var allMounts []Mount
for i, di := range d.discoverers {
mounts, err := di.Mounts()
if err != nil {
return nil, fmt.Errorf("error discovering mounts for discoverer %v: %v", i, err)
}
allMounts = append(allMounts, mounts...)
}
return allMounts, nil
}
// Hooks returns all Hooks from the included discoverers
func (d list) Hooks() ([]Hook, error) {
var allHooks []Hook
for i, di := range d.discoverers {
hooks, err := di.Hooks()
if err != nil {
return nil, fmt.Errorf("error discovering hooks for discoverer %v: %v", i, err)
}
allHooks = append(allHooks, hooks...)
}
return allHooks, nil
}

View File

@@ -0,0 +1,35 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"github.com/sirupsen/logrus"
)
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
func NewMOFEDDiscoverer(logger *logrus.Logger, root string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
[]string{
"/dev/infiniband/uverbs*",
"/dev/infiniband/rdma_cm",
},
root,
)
return devices, nil
}

117
internal/discover/mounts.go Normal file
View File

@@ -0,0 +1,117 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"fmt"
"path/filepath"
"strings"
"sync"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
// mounts is a generic discoverer for Mounts. It is customized by specifying the
// required entities as a list and a Locator that is used to find the target mounts
// based on the entry in the list.
type mounts struct {
None
logger *logrus.Logger
lookup lookup.Locator
root string
required []string
sync.Mutex
cache []Mount
}
var _ Discover = (*mounts)(nil)
// NewMounts creates a discoverer for the required mounts using the specified locator.
func NewMounts(logger *logrus.Logger, lookup lookup.Locator, root string, required []string) Discover {
return &mounts{
logger: logger,
lookup: lookup,
root: filepath.Join("/", root),
required: required,
}
}
func (d *mounts) Mounts() ([]Mount, error) {
if d.lookup == nil {
return nil, fmt.Errorf("no lookup defined")
}
if d.cache != nil {
d.logger.Debugf("returning cached mounts")
return d.cache, nil
}
d.Lock()
defer d.Unlock()
uniqueMounts := make(map[string]Mount)
for _, candidate := range d.required {
d.logger.Debugf("Locating %v", candidate)
located, err := d.lookup.Locate(candidate)
if err != nil {
d.logger.Warnf("Could not locate %v: %v", candidate, err)
continue
}
if len(located) == 0 {
d.logger.Warnf("Missing %v", candidate)
continue
}
d.logger.Debugf("Located %v as %v", candidate, located)
for _, p := range located {
if _, ok := uniqueMounts[p]; ok {
d.logger.Debugf("Skipping duplicate mount %v", p)
continue
}
r := d.relativeTo(p)
if r == "" {
r = p
}
d.logger.Infof("Selecting %v as %v", p, r)
uniqueMounts[p] = Mount{
HostPath: p,
Path: r,
}
}
}
var mounts []Mount
for _, m := range uniqueMounts {
mounts = append(mounts, m)
}
d.cache = mounts
return d.cache, nil
}
// relativeTo returns the path relative to the root for the file locator
func (d *mounts) relativeTo(path string) string {
if d.root == "/" {
return path
}
return strings.TrimPrefix(path, d.root)
}

View File

@@ -0,0 +1,180 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/stretchr/testify/require"
testlog "github.com/sirupsen/logrus/hooks/test"
)
func TestMountsReturnsEmptyDevices(t *testing.T) {
d := mounts{}
devices, err := d.Devices()
require.NoError(t, err)
require.Empty(t, devices)
}
func TestMounts(t *testing.T) {
logger, logHook := testlog.NewNullLogger()
testCases := []struct {
description string
expectedError error
expectedMounts []Mount
input *mounts
}{
{
description: "nill lookup returns error",
expectedError: fmt.Errorf("no lookup defined"),
input: &mounts{},
},
{
description: "empty required returns no mounts",
expectedError: nil,
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(string) ([]string, error) {
return []string{"located"}, nil
},
},
},
},
{
description: "required returns located",
expectedError: nil,
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(string) ([]string, error) {
return []string{"located"}, nil
},
},
required: []string{"required"},
},
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
},
{
description: "mounts removes located duplicates",
expectedError: nil,
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(string) ([]string, error) {
return []string{"located"}, nil
},
},
required: []string{"required0", "required1"},
},
expectedMounts: []Mount{{Path: "located", HostPath: "located"}},
},
{
description: "mounts skips located errors",
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
if s == "error" {
return nil, fmt.Errorf(s)
}
return []string{s}, nil
},
},
required: []string{"required0", "error", "required1"},
},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
},
{
description: "mounts skips unlocated",
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
if s == "empty" {
return nil, nil
}
return []string{s}, nil
},
},
required: []string{"required0", "empty", "required1"},
},
expectedMounts: []Mount{{Path: "required0", HostPath: "required0"}, {Path: "required1", HostPath: "required1"}},
},
{
description: "mounts adds multiple",
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
if s == "multiple" {
return []string{"multiple0", "multiple1"}, nil
}
return []string{s}, nil
},
},
required: []string{"required0", "multiple", "required1"},
},
expectedMounts: []Mount{
{Path: "required0", HostPath: "required0"},
{Path: "multiple0", HostPath: "multiple0"},
{Path: "multiple1", HostPath: "multiple1"},
{Path: "required1", HostPath: "required1"},
},
},
{
description: "mounts uses relative path",
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
return []string{"/some/root/located"}, nil
},
},
root: "/some/root",
required: []string{"required0", "multiple", "required1"},
},
expectedMounts: []Mount{
{Path: "/located", HostPath: "/some/root/located"},
},
},
}
for _, tc := range testCases {
logHook.Reset()
t.Run(tc.description, func(t *testing.T) {
tc.input.logger = logger
mounts, err := tc.input.Mounts()
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.ElementsMatch(t, tc.expectedMounts, mounts)
// We check that the mock is called for each element of required
if tc.input.lookup != nil {
mock := tc.input.lookup.(*lookup.LocatorMock)
require.Len(t, mock.LocateCalls(), len(tc.input.required))
var args []string
for _, c := range mock.LocateCalls() {
args = append(args, c.S)
}
require.EqualValues(t, args, tc.input.required)
}
})
}
}

38
internal/discover/none.go Normal file
View File

@@ -0,0 +1,38 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
// None is a null discoverer that returns an empty list of devices and
// mounts.
type None struct{}
var _ Discover = (*None)(nil)
// Devices returns an empty list of devices
func (e None) Devices() ([]Device, error) {
return []Device{}, nil
}
// Mounts returns an empty list of mounts
func (e None) Mounts() ([]Mount, error) {
return []Mount{}, nil
}
// Hooks returns an empty list of hooks
func (e None) Hooks() ([]Hook, error) {
return []Hook{}, nil
}

View File

@@ -0,0 +1,31 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestNone(t *testing.T) {
d := None{}
mounts, err := d.Mounts()
require.NoError(t, err)
require.Empty(t, mounts)
}

View File

@@ -0,0 +1,125 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/sirupsen/logrus"
)
type symlinks struct {
None
logger *logrus.Logger
lookup lookup.Locator
nvidiaCTKExecutablePath string
csvFiles []string
mountsFrom Discover
}
// NewCreateSymlinksHook creates a discoverer for a hook that creates required symlinks in the container
func NewCreateSymlinksHook(logger *logrus.Logger, csvFiles []string, mounts Discover, cfg *Config) (Discover, error) {
d := symlinks{
logger: logger,
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath,
csvFiles: csvFiles,
mountsFrom: mounts,
}
return &d, nil
}
// Hooks returns a hook to create the symlinks from the required CSV files
func (d symlinks) Hooks() ([]Hook, error) {
hookPath := nvidiaCTKDefaultFilePath
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
if err != nil {
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
} else if len(targets) == 0 {
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
} else {
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
hookPath = targets[0]
}
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
args := []string{hookPath, "hook", "create-symlinks"}
for _, f := range d.csvFiles {
args = append(args, "--csv-filename", f)
}
links, err := d.getSpecificLinkArgs()
if err != nil {
return nil, fmt.Errorf("failed to determine specific links: %v", err)
}
args = append(args, links...)
h := Hook{
Lifecycle: cdi.CreateContainerHook,
Path: hookPath,
Args: args,
}
return []Hook{h}, nil
}
// getSpecificLinkArgs returns the required specic links that need to be created
func (d symlinks) getSpecificLinkArgs() ([]string, error) {
mounts, err := d.mountsFrom.Mounts()
if err != nil {
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
}
linkProcessed := make(map[string]bool)
var links []string
for _, m := range mounts {
var target string
var link string
lib := filepath.Base(m.Path)
if strings.HasPrefix(lib, "libcuda.so") {
// XXX Many applications wrongly assume that libcuda.so exists (e.g. with dlopen).
target = "libcuda.so.1"
link = "libcuda.so"
} else if strings.HasPrefix(lib, "libGLX_nvidia.so") {
// XXX GLVND requires this symlink for indirect GLX support.
target = lib
link = "libGLX_indirect.so.0"
} else if strings.HasPrefix(lib, "libnvidia-opticalflow.so") {
// XXX Fix missing symlink for libnvidia-opticalflow.so.
target = "libnvidia-opticalflow.so.1"
link = "libnvidia-opticalflow.so"
} else {
continue
}
if linkProcessed[link] {
continue
}
linkPath := filepath.Join(filepath.Dir(m.Path), link)
links = append(links, "--link", fmt.Sprintf("%v:%v", target, linkPath))
linkProcessed[link] = true
}
return links, nil
}

67
internal/edits/device.go Normal file
View File

@@ -0,0 +1,67 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/opencontainers/runc/libcontainer/devices"
)
type device discover.Device
// toEdits converts a discovered device to CDI Container Edits.
func (d device) toEdits() (*cdi.ContainerEdits, error) {
deviceNode, err := d.toSpec()
if err != nil {
return nil, err
}
e := cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
DeviceNodes: []*specs.DeviceNode{deviceNode},
},
}
return &e, nil
}
// toSpec converts a discovered Device to a CDI Spec Device. Note
// that missing info is filled in when edits are applied by querying the Device node.
func (d device) toSpec() (*specs.DeviceNode, error) {
// NOTE: This mirrors what cri-o does.
// https://github.com/cri-o/cri-o/blob/ca3bb80a3dda0440659fcf8da8ed6f23211de94e/internal/config/device/device.go#L93
// This can be removed once https://github.com/container-orchestrated-devices/container-device-interface/issues/72 is addressed
dev, err := devices.DeviceFromPath(d.HostPath, "rwm")
if err != nil {
return nil, fmt.Errorf("failed to query device node %v: %v", d.HostPath, err)
}
s := specs.DeviceNode{
Path: d.Path,
Type: string(dev.Type),
Major: dev.Major,
Minor: dev.Minor,
FileMode: &dev.FileMode,
UID: &dev.Uid,
GID: &dev.Gid,
}
return &s, nil
}

97
internal/edits/edits.go Normal file
View File

@@ -0,0 +1,97 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type edits struct {
cdi.ContainerEdits
logger *logrus.Logger
}
// NewSpecEdits creates a SpecModifier that defines the required OCI spec edits (as CDI ContainerEdits) from the specified
// discoverer.
func NewSpecEdits(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
devices, err := d.Devices()
if err != nil {
return nil, fmt.Errorf("failed to discover devices: %v", err)
}
mounts, err := d.Mounts()
if err != nil {
return nil, fmt.Errorf("failed to discover mounts: %v", err)
}
hooks, err := d.Hooks()
if err != nil {
return nil, fmt.Errorf("failed to discover hooks: %v", err)
}
c := cdi.ContainerEdits{}
for _, d := range devices {
edits, err := device(d).toEdits()
if err != nil {
return nil, fmt.Errorf("failed to created container edits for device: %v", err)
}
c.Append(edits)
}
for _, m := range mounts {
c.Append(mount(m).toEdits())
}
for _, h := range hooks {
c.Append(hook(h).toEdits())
}
e := edits{
ContainerEdits: c,
logger: logger,
}
return &e, nil
}
// Modify applies the defined edits to the incoming OCI spec
func (e *edits) Modify(spec *ociSpecs.Spec) error {
if e == nil || e.ContainerEdits.ContainerEdits == nil {
return nil
}
e.logger.Info("Mounts:")
for _, mount := range e.Mounts {
e.logger.Infof("Mounting %v at %v", mount.HostPath, mount.ContainerPath)
}
e.logger.Infof("Devices:")
for _, device := range e.DeviceNodes {
e.logger.Infof("Injecting %v", device.Path)
}
e.logger.Infof("Hooks:")
for _, hook := range e.Hooks {
e.logger.Infof("Injecting %v", hook.Args)
}
return e.Apply(spec)
}

47
internal/edits/hook.go Normal file
View File

@@ -0,0 +1,47 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type hook discover.Hook
// toEdits converts a discovered hook to CDI Container Edits.
func (d hook) toEdits() *cdi.ContainerEdits {
e := cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
Hooks: []*specs.Hook{d.toSpec()},
},
}
return &e
}
// toSpec converts a discovered Hook to a CDI Spec Hook. Note
// that missing info is filled in when edits are applied by querying the Hook node.
func (d hook) toSpec() *specs.Hook {
s := specs.Hook{
HookName: d.Lifecycle,
Path: d.Path,
Args: d.Args,
}
return &s
}

52
internal/edits/mount.go Normal file
View File

@@ -0,0 +1,52 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type mount discover.Mount
// toEdits converts a discovered mount to CDI Container Edits.
func (d mount) toEdits() *cdi.ContainerEdits {
e := cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
Mounts: []*specs.Mount{d.toSpec()},
},
}
return &e
}
// toSpec converts a discovered Mount to a CDI Spec Mount. Note
// that missing info is filled in when edits are applied by querying the Mount node.
func (d mount) toSpec() *specs.Mount {
s := specs.Mount{
HostPath: d.HostPath,
ContainerPath: d.Path,
Options: []string{
"ro",
"nosuid",
"nodev",
"bind",
},
}
return &s
}

48
internal/info/auto.go Normal file
View File

@@ -0,0 +1,48 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
import "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvinfo"
// Logger is a basic interface for logging to allow these functions to be called
// from code where logrus is not used.
type Logger interface {
Infof(string, ...interface{})
Debugf(string, ...interface{})
}
// ResolveAutoMode determines the correct mode for the platform if set to "auto"
func ResolveAutoMode(logger Logger, mode string) (rmode string) {
if mode != "auto" {
return mode
}
defer func() {
logger.Infof("Auto-detected mode as '%v'", rmode)
}()
isTegra, reason := nvinfo.IsTegraSystem()
logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
hasNVML, reason := nvinfo.HasNVML()
logger.Debugf("Has NVML? %v: %v", hasNVML, reason)
if isTegra && !hasNVML {
return "csv"
}
return "legacy"
}

View File

@@ -0,0 +1,53 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
import (
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestResolveAutoMode(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
mode string
expectedMode string
}{
{
description: "non-auto resolves to input",
mode: "not-auto",
expectedMode: "not-auto",
},
// TODO: The following test is brittle in that it will break on Tegra-based systems.
// {
// description: "auto resolves to legacy",
// mode: "auto",
// expectedMode: "legacy",
// },
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
mode := ResolveAutoMode(logger, tc.mode)
require.EqualValues(t, tc.expectedMode, mode)
})
}
}

43
internal/info/version.go Normal file
View File

@@ -0,0 +1,43 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
import "strings"
// version must be set by go build's -X main.version= option in the Makefile.
var version = "unknown"
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
// GetVersionParts returns the different version components
func GetVersionParts() []string {
v := []string{version}
if gitCommit != "" {
v = append(v, "commit: "+gitCommit)
}
return v
}
// GetVersionString returns the string representation of the version
func GetVersionString(more ...string) string {
v := append(GetVersionParts(), more...)
return strings.Join(v, "\n")
}

53
internal/lookup/device.go Normal file
View File

@@ -0,0 +1,53 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package lookup
import (
"fmt"
"os"
"path/filepath"
"github.com/sirupsen/logrus"
)
const (
devRoot = "/dev"
)
// NewCharDeviceLocator creates a Locator that can be used to find char devices at the specified root. A logger is
// also specified.
func NewCharDeviceLocator(logger *logrus.Logger, root string) Locator {
l := file{
logger: logger,
prefixes: []string{root, filepath.Join(root, devRoot)},
filter: assertCharDevice,
}
return &l
}
// assertCharDevice checks whether the specified path is a char device and returns an error if this is not the case.
func assertCharDevice(filename string) error {
info, err := os.Lstat(filename)
if err != nil {
return fmt.Errorf("error getting info: %v", err)
}
if info.Mode()&os.ModeCharDevice == 0 {
return fmt.Errorf("%v is not a char device", filename)
}
return nil
}

50
internal/lookup/dir.go Normal file
View File

@@ -0,0 +1,50 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package lookup
import (
"fmt"
"os"
log "github.com/sirupsen/logrus"
)
// NewDirectoryLocator creates a Locator that can be used to find directories at the specified root. A logger
// is also specified.
func NewDirectoryLocator(logger *log.Logger, root string) Locator {
l := file{
logger: logger,
prefixes: []string{root},
filter: assertDirectory,
}
return &l
}
// assertDirectory checks wither the specified path is a directory.
func assertDirectory(filename string) error {
info, err := os.Stat(filename)
if err != nil {
return fmt.Errorf("error getting info for %v: %v", filename, err)
}
if !info.IsDir() {
return fmt.Errorf("specified path '%v' is not a directory", filename)
}
return nil
}

View File

@@ -0,0 +1,83 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package lookup
import (
"fmt"
"os"
"path/filepath"
"strings"
log "github.com/sirupsen/logrus"
)
type executable struct {
file
}
// NewExecutableLocator creates a locator to fine executable files in the path. A logger can also be specified.
func NewExecutableLocator(logger *log.Logger, root string) Locator {
paths := GetPaths(root)
var prefixes []string
for _, dir := range paths {
prefixes = append(prefixes, filepath.Join(root, dir))
}
l := executable{
file: file{
logger: logger,
prefixes: prefixes,
filter: assertExecutable,
},
}
return &l
}
var _ Locator = (*executable)(nil)
// Locate finds executable files with the specified pattern in the path.
// If a relative or absolute path is specified, the prefix paths are not considered.
func (p executable) Locate(pattern string) ([]string, error) {
// For absolute paths we ensure that it is executable
if strings.Contains(pattern, "/") {
err := assertExecutable(pattern)
if err != nil {
return nil, fmt.Errorf("absolute path %v is not an executable file: %v", pattern, err)
}
return []string{pattern}, nil
}
return p.file.Locate(pattern)
}
// assertExecutable checks whether the specified path is an execuable file.
func assertExecutable(filename string) error {
err := assertFile(filename)
if err != nil {
return err
}
info, err := os.Stat(filename)
if err != nil {
return err
}
if info.Mode()&0111 == 0 {
return fmt.Errorf("specified file '%v' is not executable", filename)
}
return nil
}

92
internal/lookup/file.go Normal file
View File

@@ -0,0 +1,92 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package lookup
import (
"fmt"
"os"
"path/filepath"
log "github.com/sirupsen/logrus"
)
// file can be used to locate file (or file-like elements) at a specified set of
// prefixes. The validity of a file is determined by a filter function.
type file struct {
logger *log.Logger
prefixes []string
filter func(string) error
}
// NewFileLocator creates a Locator that can be used to find files at the specified root. A logger
// can also be specified.
func NewFileLocator(logger *log.Logger, root string) Locator {
l := newFileLocator(logger, root)
return &l
}
func newFileLocator(logger *log.Logger, root string) file {
return file{
logger: logger,
prefixes: []string{root},
filter: assertFile,
}
}
var _ Locator = (*file)(nil)
// Locate attempts to find files with names matching the specified pattern.
// All prefixes are searched and any matching candidates are returned. If no matches are found, an error is returned.
func (p file) Locate(pattern string) ([]string, error) {
var filenames []string
for _, prefix := range p.prefixes {
pathPattern := filepath.Join(prefix, pattern)
candidates, err := filepath.Glob(pathPattern)
if err != nil {
p.logger.Debugf("Checking pattern '%v' failed: %v", pathPattern, err)
}
for _, candidate := range candidates {
p.logger.Debugf("Checking candidate '%v'", candidate)
err := p.filter(candidate)
if err != nil {
p.logger.Debugf("Candidate '%v' does not meet requirements: %v", candidate, err)
continue
}
filenames = append(filenames, candidate)
}
}
if len(filenames) == 0 {
return nil, fmt.Errorf("pattern %v not found", pattern)
}
return filenames, nil
}
// assertFile checks whether the specified path is a regular file
func assertFile(filename string) error {
info, err := os.Stat(filename)
if err != nil {
return fmt.Errorf("error getting info for %v: %v", filename, err)
}
if info.IsDir() {
return fmt.Errorf("specified path '%v' is a directory", filename)
}
return nil
}

View File

@@ -0,0 +1,24 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package lookup
//go:generate moq -stub -out locator_mock.go . Locator
// Locator defines the interface for locating files on a system.
type Locator interface {
Locate(string) ([]string, error)
}

View File

@@ -0,0 +1,124 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package lookup
import (
"sync"
)
// Ensure, that LocatorMock does implement Locator.
// If this is not the case, regenerate this file with moq.
var _ Locator = &LocatorMock{}
// LocatorMock is a mock implementation of Locator.
//
// func TestSomethingThatUsesLocator(t *testing.T) {
//
// // make and configure a mocked Locator
// mockedLocator := &LocatorMock{
// LocateFunc: func(s string) ([]string, error) {
// panic("mock out the Locate method")
// },
// RelativeFunc: func(s string) (string, error) {
// panic("mock out the Relative method")
// },
// }
//
// // use mockedLocator in code that requires Locator
// // and then make assertions.
//
// }
type LocatorMock struct {
// LocateFunc mocks the Locate method.
LocateFunc func(s string) ([]string, error)
// RelativeFunc mocks the Relative method.
RelativeFunc func(s string) (string, error)
// calls tracks calls to the methods.
calls struct {
// Locate holds details about calls to the Locate method.
Locate []struct {
// S is the s argument value.
S string
}
// Relative holds details about calls to the Relative method.
Relative []struct {
// S is the s argument value.
S string
}
}
lockLocate sync.RWMutex
lockRelative sync.RWMutex
}
// Locate calls LocateFunc.
func (mock *LocatorMock) Locate(s string) ([]string, error) {
callInfo := struct {
S string
}{
S: s,
}
mock.lockLocate.Lock()
mock.calls.Locate = append(mock.calls.Locate, callInfo)
mock.lockLocate.Unlock()
if mock.LocateFunc == nil {
var (
stringsOut []string
errOut error
)
return stringsOut, errOut
}
return mock.LocateFunc(s)
}
// LocateCalls gets all the calls that were made to Locate.
// Check the length with:
// len(mockedLocator.LocateCalls())
func (mock *LocatorMock) LocateCalls() []struct {
S string
} {
var calls []struct {
S string
}
mock.lockLocate.RLock()
calls = mock.calls.Locate
mock.lockLocate.RUnlock()
return calls
}
// Relative calls RelativeFunc.
func (mock *LocatorMock) Relative(s string) (string, error) {
callInfo := struct {
S string
}{
S: s,
}
mock.lockRelative.Lock()
mock.calls.Relative = append(mock.calls.Relative, callInfo)
mock.lockRelative.Unlock()
if mock.RelativeFunc == nil {
var (
sOut string
errOut error
)
return sOut, errOut
}
return mock.RelativeFunc(s)
}
// RelativeCalls gets all the calls that were made to Relative.
// Check the length with:
// len(mockedLocator.RelativeCalls())
func (mock *LocatorMock) RelativeCalls() []struct {
S string
} {
var calls []struct {
S string
}
mock.lockRelative.RLock()
calls = mock.calls.Relative
mock.lockRelative.RUnlock()
return calls
}

69
internal/lookup/path.go Normal file
View File

@@ -0,0 +1,69 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package lookup
import (
"os"
"path"
"path/filepath"
"strings"
)
const (
envPath = "PATH"
)
var (
defaultPATH = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
)
// GetPaths returns a list of paths for a specified root. These are constructed from the
// PATH environment variable, a default path list, and the supplied root.
func GetPaths(root string) []string {
dirs := filepath.SplitList(os.Getenv(envPath))
inDirs := make(map[string]bool)
for _, d := range dirs {
inDirs[d] = true
}
// directories from the environment have higher precedence
for _, d := range defaultPATH {
if inDirs[d] {
// We don't add paths that are already included
continue
}
dirs = append(dirs, d)
}
if root != "" && root != "/" {
rootDirs := []string{}
for _, dir := range dirs {
rootDirs = append(rootDirs, path.Join(root, dir))
}
// directories with the root prefix have higher precedence
dirs = append(rootDirs, dirs...)
}
return dirs
}
// GetPath returns a colon-separated path value that can be used to set the PATH
// environment variable
func GetPath(root string) string {
return strings.Join(GetPaths(root), ":")
}

124
internal/lookup/symlinks.go Normal file
View File

@@ -0,0 +1,124 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package lookup
import (
"fmt"
"os"
"path/filepath"
"github.com/sirupsen/logrus"
)
type symlinkChain struct {
file
}
type symlink struct {
file
}
// NewSymlinkChainLocator creats a locator that can be used for locating files through symlinks.
// A logger can also be specified.
func NewSymlinkChainLocator(logger *logrus.Logger, root string) Locator {
l := symlinkChain{
file: newFileLocator(logger, root),
}
return &l
}
// NewSymlinkLocator creats a locator that can be used for locating files through symlinks.
// A logger can also be specified.
func NewSymlinkLocator(logger *logrus.Logger, root string) Locator {
l := symlink{
file: newFileLocator(logger, root),
}
return &l
}
// Locate finds the specified pattern at the specified root.
// If the file is a symlink, the link is followed and all candidates to the final target are returned.
func (p symlinkChain) Locate(pattern string) ([]string, error) {
candidates, err := p.file.Locate(pattern)
if err != nil {
return nil, err
}
if len(candidates) == 0 {
return candidates, nil
}
found := make(map[string]bool)
for len(candidates) > 0 {
candidate := candidates[0]
candidates = candidates[:len(candidates)-1]
if found[candidate] {
continue
}
found[candidate] = true
info, err := os.Lstat(candidate)
if err != nil {
return nil, fmt.Errorf("failed to get file info: %v", info)
}
if info.Mode()&os.ModeSymlink == 0 {
continue
}
target, err := os.Readlink(candidate)
if err != nil {
return nil, fmt.Errorf("error checking symlink: %v", err)
}
if !filepath.IsAbs(target) {
target, err = filepath.Abs(filepath.Join(filepath.Dir(candidate), target))
if err != nil {
return nil, fmt.Errorf("failed to construct absolute path: %v", err)
}
}
p.logger.Debugf("Resolved link: '%v' => '%v'", candidate, target)
if !found[target] {
candidates = append(candidates, target)
}
}
var filenames []string
for f := range found {
filenames = append(filenames, f)
}
return filenames, nil
}
// Locate finds the specified pattern at the specified root.
// If the file is a symlink, the link is resolved and the target returned.
func (p symlink) Locate(pattern string) ([]string, error) {
candidates, err := p.file.Locate(pattern)
if err != nil {
return nil, err
}
if len(candidates) != 1 {
return nil, fmt.Errorf("failed to uniquely resolve symlink %v: %v", pattern, candidates)
}
target, err := filepath.EvalSymlinks(candidates[0])
if err != nil {
return nil, fmt.Errorf("failed to resolve link: %v", err)
}
return []string{target}, err
}

128
internal/modifier/cdi.go Normal file
View File

@@ -0,0 +1,128 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"fmt"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type cdiModifier struct {
logger *logrus.Logger
specDirs []string
devices []string
}
// NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES enviroment variable is
// used to select the devices to include.
func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
devices, err := getDevicesFromSpec(ociSpec)
if err != nil {
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
}
if len(devices) == 0 {
logger.Debugf("No devices requested; no modification required.")
return nil, nil
}
specDirs := cdi.DefaultSpecDirs
if len(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.SpecDirs) > 0 {
specDirs = cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.SpecDirs
}
m := cdiModifier{
logger: logger,
specDirs: specDirs,
devices: devices,
}
return m, nil
}
func getDevicesFromSpec(ociSpec oci.Spec) ([]string, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
}
image, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
envDevices := image.DevicesFromEnvvars(visibleDevicesEnvvar)
_, annotationDevices, err := cdi.ParseAnnotations(rawSpec.Annotations)
if err != nil {
return nil, fmt.Errorf("failed to parse container annotations: %v", err)
}
uniqueDevices := make(map[string]struct{})
for _, name := range append(envDevices, annotationDevices...) {
if !cdi.IsQualifiedName(name) {
name = cdi.QualifiedName("nvidia.com", "gpu", name)
}
uniqueDevices[name] = struct{}{}
}
var devices []string
for name := range uniqueDevices {
devices = append(devices, name)
}
return devices, nil
}
// Modify loads the CDI registry and injects the specified CDI devices into the OCI runtime specification.
func (m cdiModifier) Modify(spec *specs.Spec) error {
registry := cdi.GetRegistry(
cdi.WithSpecDirs(m.specDirs...),
cdi.WithAutoRefresh(false),
)
if err := registry.Refresh(); err != nil {
m.logger.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
}
devices := m.devices
for _, d := range devices {
if d == "nvidia.com/gpu=all" {
devices = []string{}
for _, candidate := range registry.DeviceDB().ListDevices() {
if strings.HasPrefix(candidate, "nvidia.com/gpu=") {
devices = append(devices, candidate)
}
}
break
}
}
m.logger.Debugf("Injecting devices using CDI: %v", devices)
_, err := registry.InjectDevices(spec, devices...)
if err != nil {
return fmt.Errorf("failed to inject CDI devices: %v", err)
}
return nil
}

Some files were not shown because too many files have changed in this diff Show More