Compare commits

...

67 Commits

Author SHA1 Message Date
Evan Lezar
cdfb232d4c Merge pull request #445 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/sys-0.19.0
Bump golang.org/x/sys from 0.18.0 to 0.19.0
2024-04-16 11:38:26 +02:00
dependabot[bot]
fa259354ac Bump golang.org/x/sys from 0.18.0 to 0.19.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.18.0 to 0.19.0.
- [Commits](https://github.com/golang/sys/compare/v0.18.0...v0.19.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-15 09:51:28 +00:00
Evan Lezar
43a3861463 Merge pull request #442 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/mod-0.17.0
Bump golang.org/x/mod from 0.16.0 to 0.17.0
2024-04-15 11:50:43 +02:00
dependabot[bot]
9274829517 Bump golang.org/x/mod from 0.16.0 to 0.17.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.16.0 to 0.17.0.
- [Commits](https://github.com/golang/mod/compare/v0.16.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-07 08:14:08 +00:00
Evan Lezar
4668c511de Merge pull request #415 from NVIDIA/dependabot/go_modules/release-1.14/github.com/NVIDIA/go-nvlib-0.2.0
Bump github.com/NVIDIA/go-nvlib from 0.0.0-20231115170030-b21432a353e1 to 0.2.0
2024-03-18 14:20:18 +02:00
dependabot[bot]
76ecce5e8f Bump github.com/NVIDIA/go-nvlib
Bumps [github.com/NVIDIA/go-nvlib](https://github.com/NVIDIA/go-nvlib) from 0.0.0-20231115170030-b21432a353e1 to 0.2.0.
- [Release notes](https://github.com/NVIDIA/go-nvlib/releases)
- [Commits](https://github.com/NVIDIA/go-nvlib/commits/v0.2.0)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvlib
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-17 08:06:07 +00:00
Evan Lezar
cbb66c1a30 Merge pull request #399 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/sys-0.18.0
Bump golang.org/x/sys from 0.17.0 to 0.18.0
2024-03-12 14:11:14 +02:00
dependabot[bot]
a714bf2d83 Bump golang.org/x/sys from 0.17.0 to 0.18.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.17.0 to 0.18.0.
- [Commits](https://github.com/golang/sys/compare/v0.17.0...v0.18.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-12 11:15:38 +00:00
Evan Lezar
de2ed16f6c Merge pull request #398 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/mod-0.16.0
Bump golang.org/x/mod from 0.15.0 to 0.16.0
2024-03-12 13:14:29 +02:00
dependabot[bot]
99c8370f25 Bump golang.org/x/mod from 0.15.0 to 0.16.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.15.0 to 0.16.0.
- [Commits](https://github.com/golang/mod/compare/v0.15.0...v0.16.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-10 08:16:57 +00:00
Evan Lezar
d20b00e360 Merge pull request #390 from NVIDIA/dependabot/go_modules/release-1.14/github.com/stretchr/testify-1.9.0
Bump github.com/stretchr/testify from 1.8.4 to 1.9.0
2024-03-01 22:16:46 +02:00
dependabot[bot]
a724207fbd Bump github.com/stretchr/testify from 1.8.4 to 1.9.0
Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.8.4 to 1.9.0.
- [Release notes](https://github.com/stretchr/testify/releases)
- [Commits](https://github.com/stretchr/testify/compare/v1.8.4...v1.9.0)

---
updated-dependencies:
- dependency-name: github.com/stretchr/testify
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-01 15:45:38 +00:00
Evan Lezar
37b1e37c8f Merge pull request #385 from elezar/migrate-github-go-nvlib
Use github.com/NVIDIA/go-nvlib
2024-02-29 10:33:38 +02:00
Evan Lezar
9019dd1d02 Use github.com/NVIDIA/go-nvlib
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-29 10:29:20 +02:00
Christopher Desiniotis
5605d19133 Merge pull request #380 from tariq1890/cpick-exists-fallback
[R550 driver support] add fallback logic to device.Exists(name)
2024-02-27 12:41:04 -08:00
Tariq Ibrahim
3a0c989066 [R550 driver support] add fallback logic to device.Exists(name)
Signed-off-by: Tariq Ibrahim <tibrahim@nvidia.com>
(cherry picked from commit f80f4c485d)
2024-02-27 12:30:45 -08:00
Evan Lezar
5d246adf3d Merge pull request #376 from elezar/cherry-pick-v1.14.6
Cherry pick v1.14.6
2024-02-27 16:55:22 +02:00
Evan Lezar
8281e7d341 Update libnvidia-container to d2eb0afe
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-27 14:35:41 +02:00
Evan Lezar
1d046b4a9b Bump version to v1.14.6
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-27 13:34:23 +02:00
Evan Lezar
888ad62c98 Update changelog
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-27 13:33:53 +02:00
Kevin Klues
2fa37973e0 Add support for an NVIDIA_IMEX_CHANNELS envvar
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2024-02-27 13:33:33 +02:00
Evan Lezar
d57d83405d Merge pull request #352 from elezar/cherry-pick-release-1.14
Support both nvidia and nvidia-frontend when extracting major
2024-02-19 13:59:34 +01:00
Evan Lezar
6b077a2f1c Bump libnvidia-container
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-19 13:36:51 +01:00
Evan Lezar
4e2861fe77 Support both nvidia and nvidia-frontend when extracting major
With newer driver versions the nvidia-frontend name in /proc/devices
was replaced with nvidia. This change allows both nvidia and nvidia-frontend
to be used to extract the expected device major (195).

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-19 13:36:01 +01:00
Evan Lezar
b51d51369d Merge pull request #368 from NVIDIA/dependabot/go_modules/release-1.14/github.com/opencontainers/runtime-spec-1.2.0
Bump github.com/opencontainers/runtime-spec from 1.1.0 to 1.2.0
2024-02-19 11:36:08 +01:00
dependabot[bot]
cc15f77f9a Bump github.com/opencontainers/runtime-spec from 1.1.0 to 1.2.0
Bumps [github.com/opencontainers/runtime-spec](https://github.com/opencontainers/runtime-spec) from 1.1.0 to 1.2.0.
- [Release notes](https://github.com/opencontainers/runtime-spec/releases)
- [Changelog](https://github.com/opencontainers/runtime-spec/blob/main/ChangeLog)
- [Commits](https://github.com/opencontainers/runtime-spec/compare/v1.1.0...v1.2.0)

---
updated-dependencies:
- dependency-name: github.com/opencontainers/runtime-spec
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-19 10:20:33 +00:00
Evan Lezar
10e86367f9 Merge pull request #365 from NVIDIA/dependabot/go_modules/release-1.14/github.com/NVIDIA/go-nvml-0.12.0-2
Bump github.com/NVIDIA/go-nvml from 0.12.0-1 to 0.12.0-2
2024-02-19 11:19:24 +01:00
dependabot[bot]
7c6c5e6104 Bump github.com/NVIDIA/go-nvml from 0.12.0-1 to 0.12.0-2
Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.12.0-1 to 0.12.0-2.
- [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.12.0-1...v0.12.0-2)

---
updated-dependencies:
- dependency-name: github.com/NVIDIA/go-nvml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-19 10:17:03 +00:00
Evan Lezar
323580c2fa Merge pull request #366 from NVIDIA/dependabot/go_modules/release-1.14/github.com/stretchr/testify-1.8.4
Bump github.com/stretchr/testify from 1.8.1 to 1.8.4
2024-02-19 11:15:21 +01:00
dependabot[bot]
ae329e3a94 Bump github.com/stretchr/testify from 1.8.1 to 1.8.4
Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.8.1 to 1.8.4.
- [Release notes](https://github.com/stretchr/testify/releases)
- [Commits](https://github.com/stretchr/testify/compare/v1.8.1...v1.8.4)

---
updated-dependencies:
- dependency-name: github.com/stretchr/testify
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-19 10:11:40 +00:00
Evan Lezar
99c955a3f4 Merge pull request #367 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/mod-0.15.0
Bump golang.org/x/mod from 0.5.0 to 0.15.0
2024-02-19 11:10:59 +01:00
dependabot[bot]
24a48582ca Bump golang.org/x/mod from 0.5.0 to 0.15.0
Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.5.0 to 0.15.0.
- [Commits](https://github.com/golang/mod/compare/v0.5.0...v0.15.0)

---
updated-dependencies:
- dependency-name: golang.org/x/mod
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-18 08:17:27 +00:00
Evan Lezar
a3e0a72fd0 Merge pull request #356 from NVIDIA/dependabot/go_modules/release-1.14/github.com/urfave/cli/v2-2.27.1
Bump github.com/urfave/cli/v2 from 2.3.0 to 2.27.1
2024-02-13 21:10:07 +01:00
Evan Lezar
2bc874376f Merge pull request #358 from NVIDIA/dependabot/go_modules/release-1.14/github.com/fsnotify/fsnotify-1.7.0
Bump github.com/fsnotify/fsnotify from 1.5.4 to 1.7.0
2024-02-13 21:09:53 +01:00
dependabot[bot]
bd1084d1a1 Bump github.com/urfave/cli/v2 from 2.3.0 to 2.27.1
Bumps [github.com/urfave/cli/v2](https://github.com/urfave/cli) from 2.3.0 to 2.27.1.
- [Release notes](https://github.com/urfave/cli/releases)
- [Changelog](https://github.com/urfave/cli/blob/main/docs/CHANGELOG.md)
- [Commits](https://github.com/urfave/cli/compare/v2.3.0...v2.27.1)

---
updated-dependencies:
- dependency-name: github.com/urfave/cli/v2
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:51:28 +00:00
dependabot[bot]
98e5ad0a10 Bump github.com/fsnotify/fsnotify from 1.5.4 to 1.7.0
Bumps [github.com/fsnotify/fsnotify](https://github.com/fsnotify/fsnotify) from 1.5.4 to 1.7.0.
- [Release notes](https://github.com/fsnotify/fsnotify/releases)
- [Changelog](https://github.com/fsnotify/fsnotify/blob/main/CHANGELOG.md)
- [Commits](https://github.com/fsnotify/fsnotify/compare/v1.5.4...v1.7.0)

---
updated-dependencies:
- dependency-name: github.com/fsnotify/fsnotify
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:50:54 +00:00
Evan Lezar
3c710a0596 Merge pull request #359 from NVIDIA/dependabot/go_modules/release-1.14/github.com/pelletier/go-toml-1.9.5
Bump github.com/pelletier/go-toml from 1.9.4 to 1.9.5
2024-02-12 17:46:44 +01:00
dependabot[bot]
38a8bb183a Bump github.com/pelletier/go-toml from 1.9.4 to 1.9.5
Bumps [github.com/pelletier/go-toml](https://github.com/pelletier/go-toml) from 1.9.4 to 1.9.5.
- [Release notes](https://github.com/pelletier/go-toml/releases)
- [Changelog](https://github.com/pelletier/go-toml/blob/v2/.goreleaser.yaml)
- [Commits](https://github.com/pelletier/go-toml/compare/v1.9.4...v1.9.5)

---
updated-dependencies:
- dependency-name: github.com/pelletier/go-toml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:41:39 +00:00
Evan Lezar
7038e7f003 Merge pull request #357 from NVIDIA/dependabot/go_modules/release-1.14/golang.org/x/sys-0.17.0
Bump golang.org/x/sys from 0.7.0 to 0.17.0
2024-02-12 17:41:20 +01:00
Evan Lezar
539033af43 Merge pull request #355 from NVIDIA/dependabot/go_modules/release-1.14/github.com/sirupsen/logrus-1.9.3
Bump github.com/sirupsen/logrus from 1.9.0 to 1.9.3
2024-02-12 17:40:56 +01:00
dependabot[bot]
365e9e03b9 Bump golang.org/x/sys from 0.7.0 to 0.17.0
Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.7.0 to 0.17.0.
- [Commits](https://github.com/golang/sys/compare/v0.7.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sys
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:35:26 +00:00
dependabot[bot]
c1894a0760 Bump github.com/sirupsen/logrus from 1.9.0 to 1.9.3
Bumps [github.com/sirupsen/logrus](https://github.com/sirupsen/logrus) from 1.9.0 to 1.9.3.
- [Release notes](https://github.com/sirupsen/logrus/releases)
- [Changelog](https://github.com/sirupsen/logrus/blob/master/CHANGELOG.md)
- [Commits](https://github.com/sirupsen/logrus/compare/v1.9.0...v1.9.3)

---
updated-dependencies:
- dependency-name: github.com/sirupsen/logrus
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-12 16:35:05 +00:00
Evan Lezar
9ea3360701 Merge pull request #340 from elezar/cherry-pick-v1.14.5
Update dependencies for v1.14.5 release
2024-02-07 12:35:09 +01:00
Evan Lezar
b6987c526a Fix docstring
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 12:33:02 +01:00
Evan Lezar
3604927034 Replace github.com/container-orchestrated-devices/container-device-interface with tags.cncf.io
This also removes the indirect dependency on runc which has a CVE.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 12:29:01 +01:00
Evan Lezar
640bd6ee3f Remove blossom-ci
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 12:29:01 +01:00
Evan Lezar
6593f3c2e4 Merge pull request #320 from elezar/remove-centos8-jobs
Remove centos8 jobs

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 12:28:54 +01:00
Evan Lezar
d52a237c12 Merge pull request #339 from elezar/cherry-pick-1.14.5
Cherry-pick changes for v1.14.5
2024-02-07 10:20:27 +01:00
Evan Lezar
9d9260db8c Merge branch 'fix-cdi-enable-docker' into 'main'
Fix --cdi.enabled for Docker

See merge request nvidia/container-toolkit/container-toolkit!541

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 10:14:23 +01:00
Evan Lezar
888fe458ae Bump version to v1.14.5
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-02-07 10:09:48 +01:00
Evan Lezar
d167812ce3 Merge branch 'cherry-pick-enable-cdi' into 'release-1.14'
Add cdi.enabled option to runtime configure

See merge request nvidia/container-toolkit/container-toolkit!539
2024-01-19 14:40:29 +00:00
Christopher Desiniotis
7ff23999e8 Add option to nvidia-ctk to enable CDI in docker
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-19 15:36:15 +01:00
Evan Lezar
a9b01a43bc Add cdi.enabled option to runtime configure
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-19 15:35:14 +01:00
Evan Lezar
ccff00bc30 Merge branch 'bump-version-v1.14.4' into 'release-1.14'
Bump version to v1.14.4

See merge request nvidia/container-toolkit/container-toolkit!537
2024-01-18 12:17:55 +00:00
Evan Lezar
f7d54200c6 Bump version to v1.14.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-18 13:17:17 +01:00
Evan Lezar
29fd206f3a Merge branch 'cherry-pick-1.14.4' into 'release-1.14'
Cherry pick changes for v1.14.4 release

See merge request nvidia/container-toolkit/container-toolkit!534
2024-01-17 22:51:28 +00:00
Evan Lezar
cfe0d5d07e Skip component updates
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 23:06:49 +01:00
Evan Lezar
9ab640b2be Set libnvidia-container branch
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 22:46:13 +01:00
Evan Lezar
9d2e4b48bc Update libnvidia-container to 1.14.4
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-01-17 22:33:44 +01:00
Evan Lezar
c050bcf081 Merge branch 'add-crun-as-configured-runtime' into 'main'
Set default low-level runtimes to runc, crun

See merge request nvidia/container-toolkit/container-toolkit!536
2024-01-17 22:29:58 +01:00
Evan Lezar
27d0fa4ee2 Merge branch 'bump-cuda-12.3.1' into 'main'
Bump CUDA base image to 12.3.1

See merge request nvidia/container-toolkit/container-toolkit!535
2024-01-11 15:27:27 +01:00
Evan Lezar
e0e22fdceb Merge branch 'fix-user-group' into 'main'
Fix bug in determining CLI user on SUSE systems

See merge request nvidia/container-toolkit/container-toolkit!532
2024-01-11 15:27:25 +01:00
Evan Lezar
c1eae0deda Merge branch 'libnvdxgdmal' into 'main'
Add libnvdxgdmal library

See merge request nvidia/container-toolkit/container-toolkit!529
2024-01-11 15:27:01 +01:00
Evan Lezar
68f0203a49 Merge branch 'remove-libseccomp-dependency' into 'main'
Remove libseccomp package dependency

See merge request nvidia/container-toolkit/container-toolkit!531
2024-01-11 15:26:37 +01:00
Evan Lezar
cc688f7c75 Merge branch 'log-requested-mode' into 'main'
Log explicitly requested runtime mode

See merge request nvidia/container-toolkit/container-toolkit!527
2024-01-11 15:26:03 +01:00
Evan Lezar
7566eb124a Merge branch 'fix-config-update-command' into 'main'
Switch to reflect package for config updates

See merge request nvidia/container-toolkit/container-toolkit!500
2024-01-11 15:25:33 +01:00
Evan Lezar
eb5d50abc4 Merge branch 'include-nvoptix' into 'main'
Update list of graphics mounts

See merge request nvidia/container-toolkit/container-toolkit!501
2024-01-11 15:25:09 +01:00
597 changed files with 22014 additions and 9113 deletions

View File

@@ -1,113 +0,0 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A workflow to trigger ci on hybrid infra (github + self hosted runner)
name: Blossom-CI
on:
issue_comment:
types: [created]
workflow_dispatch:
inputs:
platform:
description: 'runs-on argument'
required: false
args:
description: 'argument'
required: false
jobs:
Authorization:
name: Authorization
runs-on: blossom
outputs:
args: ${{ env.args }}
# This job only runs for pull request comments
if: |
contains( '\
anstockatnv,\
rorajani,\
cdesiniotis,\
shivamerla,\
ArangoGutierrez,\
elezar,\
klueska,\
zvonkok,\
', format('{0},', github.actor)) &&
github.event.comment.body == '/blossom-ci'
steps:
- name: Check if comment is issued by authorized person
run: blossom-ci
env:
OPERATION: 'AUTH'
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
Vulnerability-scan:
name: Vulnerability scan
needs: [Authorization]
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
lfs: 'true'
# repo specific steps
#- name: Setup java
# uses: actions/setup-java@v1
# with:
# java-version: 1.8
# add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
#- name: Setup blackduck properties
# run: |
# PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
# echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
# echo detect.maven.included.scopes=compile >> application.properties
- name: Run blossom action
uses: NVIDIA/blossom-action@main
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
with:
args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
Job-trigger:
name: Start ci job
needs: [Vulnerability-scan]
runs-on: blossom
steps:
- name: Start ci job
run: blossom-ci
env:
OPERATION: 'START-CI-JOB'
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Upload-Log:
name: Upload log
runs-on: blossom
if : github.event_name == 'workflow_dispatch'
steps:
- name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here)
run: blossom-ci
env:
OPERATION: 'POST-PROCESSING'
CI_SERVER: ${{ secrets.CI_SERVER }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -138,24 +138,12 @@ package-centos7-x86_64:
- .dist-centos7
- .arch-x86_64
package-centos8-aarch64:
extends:
- .package-build
- .dist-centos8
- .arch-aarch64
package-centos8-ppc64le:
extends:
- .package-build
- .dist-centos8
- .arch-ppc64le
package-centos8-x86_64:
extends:
- .package-build
- .dist-centos8
- .arch-x86_64
package-ubuntu18.04-amd64:
extends:
- .package-build
@@ -238,8 +226,6 @@ image-packaging:
- .package-artifacts
- .dist-packaging
needs:
- job: package-centos8-aarch64
- job: package-centos8-x86_64
- job: package-ubuntu18.04-amd64
- job: package-ubuntu18.04-arm64
- job: package-amazonlinux2-aarch64

2
.gitmodules vendored
View File

@@ -1,4 +1,4 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = main
branch = release-1.14

View File

@@ -1,5 +1,28 @@
# NVIDIA Container Toolkit Changelog
## v1.14.6
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
* Add support for selecting IMEX channels using the NVIDIA_IMEX_CHANNELS environement variable.
## v1.14.5
* Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker. This was incorrectly setting `experimental = true` instead
of setting `features.cdi = true`.
## v1.14.4
* Include `nvidia/nvoptix.bin` in list of graphics mounts.
* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts.
* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly.
* Log explicitly requested runtime mode.
* Remove package dependency on libseccomp.
* Added detection of libnvdxgdmal.so.1 on WSL2.
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
* Add `crun` to the list of configured low-level runtimes.
* Add `--cdi.enabled` option to `nvidia-ctk runtime configure` command to enable CDI in containerd.
* Added support for `nvidia-ctk runtime configure --enable-cdi` for the `docker` runtime. Note that this requires Docker >= 25.
* [toolkit-container] Bump CUDA base image version to 12.3.1.
* [libnvidia-container] Added detection of libnvdxgdmal.so.1 on WSL2.
## v1.14.3
* [toolkit-container] Bump CUDA base image version to 12.2.2.

View File

@@ -19,7 +19,7 @@ where `TARGET` is a make target that is valid for each of the sub-components.
These include:
* `ubuntu18.04-amd64`
* `centos8-x86_64`
* `centos7-x86_64`
If no `TARGET` is specified, all valid release targets are built.

View File

@@ -145,9 +145,7 @@ test-packaging:
@echo "Testing package image contents"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/aarch64" || echo "Missing centos7/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos7/x86_64" || echo "Missing centos7/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/aarch64" || echo "Missing centos8/aarch64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/ppc64le" || echo "Missing centos8/ppc64le"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/centos8/x86_64" || echo "Missing centos8/x86_64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/amd64" || echo "Missing ubuntu18.04/amd64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/arm64" || echo "Missing ubuntu18.04/arm64"
@$(DOCKER) run --rm $(IMAGE) test -d "/artifacts/packages/ubuntu18.04/ppc64le" || echo "Missing ubuntu18.04/ppc64le"

View File

@@ -9,9 +9,10 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
)
const (
@@ -22,6 +23,7 @@ const (
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)
@@ -37,6 +39,7 @@ type nvidiaConfig struct {
Devices string
MigConfigDevices string
MigMonitorDevices string
ImexChannels string
DriverCapabilities string
// Requirements defines the requirements DSL for the container to run.
// This is empty if no specific requirements are needed, or if requirements are
@@ -271,6 +274,13 @@ func getMigMonitorDevices(env map[string]string) *string {
return nil
}
func getImexChannels(env map[string]string) *string {
if chans, ok := env[envNVImexChannels]; ok {
return &chans
}
return nil
}
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities
@@ -324,6 +334,11 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
var imexChannels string
if c := getImexChannels(image); c != nil {
imexChannels = *c
}
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
requirements, err := image.GetRequirements()
@@ -335,6 +350,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
Devices: devices,
MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices,
ImexChannels: imexChannels,
DriverCapabilities: driverCapabilities,
Requirements: requirements,
}

View File

@@ -126,6 +126,9 @@ func doPrestart() {
if len(nvidia.MigMonitorDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
}
if len(nvidia.ImexChannels) > 0 {
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
}
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
if len(cap) == 0 {

View File

@@ -22,14 +22,15 @@ import (
"path/filepath"
"strings"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/urfave/cli/v2"
)
const (

View File

@@ -19,9 +19,10 @@ package list
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type command struct {

View File

@@ -21,11 +21,12 @@ import (
"io"
"os"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/urfave/cli/v2"
)
type loadSaver interface {

View File

@@ -19,14 +19,16 @@ package config
import (
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"github.com/urfave/cli/v2"
createdefault "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/create-default"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
@@ -103,7 +105,7 @@ func run(c *cli.Context, opts *options) error {
}
for _, set := range opts.sets.Value() {
key, value, err := (*configToml)(cfgToml).setFlagToKeyValue(set)
key, value, err := setFlagToKeyValue(set)
if err != nil {
return fmt.Errorf("invalid --set option %v: %w", set, err)
}
@@ -126,50 +128,86 @@ func run(c *cli.Context, opts *options) error {
return nil
}
type configToml config.Toml
var errInvalidConfigOption = errors.New("invalid config option")
var errUndefinedField = errors.New("undefined field")
var errInvalidFormat = errors.New("invalid format")
// setFlagToKeyValue converts a --set flag to a key-value pair.
// The set flag is of the form key[=value], with the value being optional if key refers to a
// boolean config option.
func (c *configToml) setFlagToKeyValue(setFlag string) (string, interface{}, error) {
if c == nil {
return "", nil, errInvalidConfigOption
}
func setFlagToKeyValue(setFlag string) (string, interface{}, error) {
setParts := strings.SplitN(setFlag, "=", 2)
key := setParts[0]
v := (*config.Toml)(c).Get(key)
if v == nil {
return key, nil, errInvalidConfigOption
}
switch v.(type) {
case bool:
if len(setParts) == 1 {
return key, true, nil
}
field, err := getField(key)
if err != nil {
return key, nil, fmt.Errorf("%w: %w", errInvalidConfigOption, err)
}
kind := field.Kind()
if len(setParts) != 2 {
if kind == reflect.Bool {
return key, true, nil
}
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
}
value := setParts[1]
switch vt := v.(type) {
case bool:
switch kind {
case reflect.Bool:
b, err := strconv.ParseBool(value)
if err != nil {
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
return key, b, err
case string:
case reflect.String:
return key, value, nil
case []string:
return key, strings.Split(value, ","), nil
default:
return key, nil, fmt.Errorf("unsupported type for %v (%v)", setParts, vt)
case reflect.Slice:
valueParts := strings.Split(value, ",")
switch field.Elem().Kind() {
case reflect.String:
return key, valueParts, nil
case reflect.Int:
var output []int64
for _, v := range valueParts {
vi, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return key, nil, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
output = append(output, vi)
}
return key, output, nil
}
}
return key, nil, fmt.Errorf("unsupported type for %v (%v)", setParts, kind)
}
func getField(key string) (reflect.Type, error) {
s, err := getStruct(reflect.TypeOf(config.Config{}), strings.Split(key, ".")...)
if err != nil {
return nil, err
}
return s.Type, err
}
func getStruct(current reflect.Type, paths ...string) (reflect.StructField, error) {
if len(paths) < 1 {
return reflect.StructField{}, fmt.Errorf("%w: no fields selected", errUndefinedField)
}
tomlField := paths[0]
for i := 0; i < current.NumField(); i++ {
f := current.Field(i)
v, ok := f.Tag.Lookup("toml")
if !ok {
continue
}
if v != tomlField {
continue
}
if len(paths) == 1 {
return f, nil
}
return getStruct(f.Type, paths[1:]...)
}
return reflect.StructField{}, fmt.Errorf("%w: %q", errUndefinedField, tomlField)
}

View File

@@ -19,152 +19,109 @@ package config
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/pelletier/go-toml"
"github.com/stretchr/testify/require"
)
func TestSetFlagToKeyValue(t *testing.T) {
// TODO: We need to enable this test again since switching to reflect.
testCases := []struct {
description string
config map[string]interface{}
setFlag string
expectedKey string
expectedValue interface{}
expectedError error
}{
{
description: "empty config returns an error",
setFlag: "anykey=value",
expectedKey: "anykey",
expectedError: errInvalidConfigOption,
},
{
description: "option not present returns an error",
config: map[string]interface{}{
"defined": "defined-value",
},
description: "option not present returns an error",
setFlag: "undefined=new-value",
expectedKey: "undefined",
expectedError: errInvalidConfigOption,
},
{
description: "boolean option assumes true",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean",
expectedKey: "boolean",
description: "undefined nexted option returns error",
setFlag: "nvidia-container-cli.undefined",
expectedKey: "nvidia-container-cli.undefined",
expectedError: errInvalidConfigOption,
},
{
description: "boolean option assumes true",
setFlag: "disable-require",
expectedKey: "disable-require",
expectedValue: true,
},
{
description: "boolean option returns true",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=true",
expectedKey: "boolean",
description: "boolean option returns true",
setFlag: "disable-require=true",
expectedKey: "disable-require",
expectedValue: true,
},
{
description: "boolean option returns false",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=false",
expectedKey: "boolean",
description: "boolean option returns false",
setFlag: "disable-require=false",
expectedKey: "disable-require",
expectedValue: false,
},
{
description: "invalid boolean option returns error",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=something",
expectedKey: "boolean",
description: "invalid boolean option returns error",
setFlag: "disable-require=something",
expectedKey: "disable-require",
expectedValue: "something",
expectedError: errInvalidFormat,
},
{
description: "string option requires value",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string",
expectedKey: "string",
description: "string option requires value",
setFlag: "swarm-resource",
expectedKey: "swarm-resource",
expectedValue: nil,
expectedError: errInvalidFormat,
},
{
description: "string option returns value",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=string-value",
expectedKey: "string",
description: "string option returns value",
setFlag: "swarm-resource=string-value",
expectedKey: "swarm-resource",
expectedValue: "string-value",
},
{
description: "string option returns value with equals",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=string-value=more",
expectedKey: "string",
description: "string option returns value with equals",
setFlag: "swarm-resource=string-value=more",
expectedKey: "swarm-resource",
expectedValue: "string-value=more",
},
{
description: "string option treats bool value as string",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=true",
expectedKey: "string",
description: "string option treats bool value as string",
setFlag: "swarm-resource=true",
expectedKey: "swarm-resource",
expectedValue: "true",
},
{
description: "string option treats int value as string",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=5",
expectedKey: "string",
description: "string option treats int value as string",
setFlag: "swarm-resource=5",
expectedKey: "swarm-resource",
expectedValue: "5",
},
{
description: "[]string option returns single value",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=string-value",
expectedKey: "string",
description: "[]string option returns single value",
setFlag: "nvidia-container-cli.environment=string-value",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"string-value"},
},
{
description: "[]string option returns multiple values",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=first,second",
expectedKey: "string",
description: "[]string option returns multiple values",
setFlag: "nvidia-container-cli.environment=first,second",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
},
{
description: "[]string option returns values with equals",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=first=1,second=2",
expectedKey: "string",
description: "[]string option returns values with equals",
setFlag: "nvidia-container-cli.environment=first=1,second=2",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first=1", "second=2"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
tree, _ := toml.TreeFromMap(tc.config)
cfgToml := (*config.Toml)(tree)
k, v, err := (*configToml)(cfgToml).setFlagToKeyValue(tc.setFlag)
k, v, err := setFlagToKeyValue(tc.setFlag)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expectedKey, k)
require.EqualValues(t, tc.expectedValue, v)

View File

@@ -20,13 +20,14 @@ import (
"fmt"
"path/filepath"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/containerd"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/crio"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine/docker"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/ocihook"
"github.com/urfave/cli/v2"
)
const (
@@ -71,6 +72,11 @@ type config struct {
hookPath string
setAsDefault bool
}
// cdi-specific options
cdi struct {
enabled bool
}
}
func (m command) build() *cli.Command {
@@ -141,6 +147,12 @@ func (m command) build() *cli.Command {
Usage: "set the NVIDIA runtime as the default runtime",
Destination: &config.nvidiaRuntime.setAsDefault,
},
&cli.BoolFlag{
Name: "cdi.enabled",
Aliases: []string{"cdi.enable"},
Usage: "Enable CDI in the configured runtime",
Destination: &config.cdi.enabled,
},
}
return &configure
@@ -175,6 +187,13 @@ func (m command) validateFlags(c *cli.Context, config *config) error {
}
}
if config.runtime != "containerd" && config.runtime != "docker" {
if config.cdi.enabled {
m.logger.Warningf("Ignoring cdi.enabled flag for %v", config.runtime)
}
config.cdi.enabled = false
}
return nil
}
@@ -227,6 +246,11 @@ func (m command) configureConfigFile(c *cli.Context, config *config) error {
return fmt.Errorf("unable to update config: %v", err)
}
err = enableCDI(config, cfg)
if err != nil {
return fmt.Errorf("failed to enable CDI in %s: %w", config.runtime, err)
}
outputPath := config.getOuputConfigPath()
n, err := cfg.Save(outputPath)
if err != nil {
@@ -277,3 +301,17 @@ func (m *command) configureOCIHook(c *cli.Context, config *config) error {
}
return nil
}
// enableCDI enables the use of CDI in the corresponding container engine
func enableCDI(config *config, cfg engine.Interface) error {
if !config.cdi.enabled {
return nil
}
switch config.runtime {
case "containerd":
return cfg.Set("enable_cdi", true)
case "docker":
return cfg.Set("features", map[string]bool{"cdi": true})
}
return fmt.Errorf("enabling CDI in %s is not supported", config.runtime)
}

View File

@@ -20,10 +20,11 @@ import (
"fmt"
"path/filepath"
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
)
type allPossible struct {

26
go.mod
View File

@@ -3,31 +3,33 @@ module github.com/NVIDIA/nvidia-container-toolkit
go 1.20
require (
github.com/NVIDIA/go-nvml v0.12.0-1
github.com/container-orchestrated-devices/container-device-interface v0.6.0
github.com/fsnotify/fsnotify v1.5.4
github.com/opencontainers/runtime-spec v1.1.0-rc.2
github.com/pelletier/go-toml v1.9.4
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.8.1
github.com/urfave/cli/v2 v2.3.0
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884
golang.org/x/mod v0.5.0
golang.org/x/sys v0.7.0
github.com/NVIDIA/go-nvlib v0.2.0
github.com/NVIDIA/go-nvml v0.12.0-3
github.com/fsnotify/fsnotify v1.7.0
github.com/opencontainers/runtime-spec v1.2.0
github.com/pelletier/go-toml v1.9.5
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.1
golang.org/x/mod v0.17.0
golang.org/x/sys v0.19.0
tags.cncf.io/container-device-interface v0.6.2
tags.cncf.io/container-device-interface/specs-go v0.6.0
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/opencontainers/runc v1.1.6 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect

64
go.sum
View File

@@ -1,20 +1,20 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NVIDIA/go-nvml v0.12.0-1 h1:6mdjtlFo+17dWL7VFPfuRMtf0061TF4DKls9pkSw6uM=
github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/NVIDIA/go-nvlib v0.2.0 h1:roq+SDstbP1fcy2XVH7wB2Gz2/Ud7Q+NGQYOcVITVrA=
github.com/NVIDIA/go-nvlib v0.2.0/go.mod h1:kFuLNTyD1tF6FbRFlk+/EdUW5BrkE+v1Y3A3/9zKSjA=
github.com/NVIDIA/go-nvml v0.12.0-3 h1:QwfjYxEqIQVRhl8327g2Y3ZvKResPydpGSKtCIIK9jE=
github.com/NVIDIA/go-nvml v0.12.0-3/go.mod h1:SOufGc5Wql+cxrIZ8RyJwVKDYxfbs4WPkHXqadcbfvA=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/container-orchestrated-devices/container-device-interface v0.6.0 h1:aWwcz/Ep0Fd7ZuBjQGjU/jdPloM7ydhMW13h85jZNvk=
github.com/container-orchestrated-devices/container-device-interface v0.6.0/go.mod h1:OQlgtJtDrOxSQ1BWODC8OZK1tzi9W69wek+Jy17ndzo=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
@@ -29,45 +29,37 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runc v1.1.6 h1:XbhB8IfG/EsnhNvZtNdLB0GBw92GYEFvKlhaJk9jUgA=
github.com/opencontainers/runc v1.1.6/go.mod h1:CbUumNnWCuTGFukNXahoo/RFBZvDAgRh/smNYNOhA50=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.1.0-rc.2 h1:ucBtEms2tamYYW/SvGpvq9yUN0NEVL6oyLEwDcTSrk8=
github.com/opencontainers/runtime-spec v1.1.0-rc.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -75,22 +67,18 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHo
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a h1:lceJVurLqiWFdxK6KMDw+SIwrAsFW/af44XrNlbGw78=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230613182322-7663cf900f0a/go.mod h1:KYZksBgh18o+uzgnpDazzG4LVYtnfB96VXHMXypEtik=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884 h1:V0LUbfm4kVA1CPG8FgG9AGZqa3ykE5U12Gd3PZgoItA=
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884/go.mod h1:/x5Ky1ZJNyCjDkgSL1atII0EFKQF5WaIHKeP5nkaQfk=
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -98,3 +86,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
tags.cncf.io/container-device-interface v0.6.2 h1:dThE6dtp/93ZDGhqaED2Pu374SOeUkBfuvkLuiTdwzg=
tags.cncf.io/container-device-interface v0.6.2/go.mod h1:Shusyhjs1A5Na/kqPVLL0KqnHQHuunol9LFeUNkuGVE=
tags.cncf.io/container-device-interface/specs-go v0.6.0 h1:V+tJJN6dqu8Vym6p+Ru+K5mJ49WL6Aoc5SJFSY0RLsQ=
tags.cncf.io/container-device-interface/specs-go v0.6.0/go.mod h1:hMAwAbMZyBLdmYqWgYcKH0F/yctNpV3P35f+/088A80=

View File

@@ -22,10 +22,11 @@ import (
"path/filepath"
"strings"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
)
const (
@@ -94,6 +95,7 @@ func GetDefault() (*Config, error) {
NVIDIAContainerCLIConfig: ContainerCLIConfig{
LoadKmods: true,
Ldconfig: getLdConfigPath(),
User: getUserGroup(),
},
NVIDIACTKConfig: CTKConfig{
Path: nvidiaCTKExecutable,
@@ -101,7 +103,7 @@ func GetDefault() (*Config, error) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc"},
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
@@ -128,24 +130,32 @@ func getLdConfigPath() string {
return "@/sbin/ldconfig"
}
// getCommentedUserGroup returns whether the nvidia-container-cli user and group config option should be commented.
func getCommentedUserGroup() bool {
uncommentIf := map[string]bool{
func getUserGroup() string {
if isSuse() {
return "root:video"
}
return ""
}
// isSuse returns whether a SUSE-based distribution was detected.
func isSuse() bool {
suseDists := map[string]bool{
"suse": true,
"opensuse": true,
}
idsLike := getDistIDLike()
for _, id := range idsLike {
if uncommentIf[id] {
return false
if suseDists[id] {
return true
}
}
return true
return false
}
// getDistIDLike returns the ID_LIKE field from /etc/os-release.
func getDistIDLike() []string {
// We can override this for testing.
var getDistIDLike = func() []string {
releaseFile, err := os.Open("/etc/os-release")
if err != nil {
return nil

View File

@@ -48,6 +48,7 @@ func TestGetConfig(t *testing.T) {
contents []string
expectedError error
inspectLdconfig bool
distIdsLike []string
expectedConfig *Config
}{
{
@@ -64,7 +65,7 @@ func TestGetConfig(t *testing.T) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc"},
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
@@ -93,6 +94,7 @@ func TestGetConfig(t *testing.T) {
"nvidia-container-cli.root = \"/bar/baz\"",
"nvidia-container-cli.load-kmods = false",
"nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"",
"nvidia-container-cli.user = \"foo:bar\"",
"nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
"nvidia-container-runtime.log-level = \"debug\"",
@@ -112,6 +114,7 @@ func TestGetConfig(t *testing.T) {
Root: "/bar/baz",
LoadKmods: false,
Ldconfig: "/foo/bar/ldconfig",
User: "foo:bar",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
@@ -152,6 +155,7 @@ func TestGetConfig(t *testing.T) {
"root = \"/bar/baz\"",
"load-kmods = false",
"ldconfig = \"/foo/bar/ldconfig\"",
"user = \"foo:bar\"",
"[nvidia-container-runtime]",
"debug = \"/foo/bar\"",
"discover-mode = \"not-legacy\"",
@@ -176,6 +180,7 @@ func TestGetConfig(t *testing.T) {
Root: "/bar/baz",
LoadKmods: false,
Ldconfig: "/foo/bar/ldconfig",
User: "foo:bar",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
@@ -207,10 +212,88 @@ func TestGetConfig(t *testing.T) {
},
},
},
{
description: "suse config",
distIdsLike: []string{"suse", "opensuse"},
inspectLdconfig: true,
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
LoadKmods: true,
Ldconfig: "WAS_CHECKED",
User: "root:video",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
},
},
{
description: "suse config overrides user",
distIdsLike: []string{"suse", "opensuse"},
inspectLdconfig: true,
contents: []string{
"nvidia-container-cli.user = \"foo:bar\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
LoadKmods: true,
Ldconfig: "WAS_CHECKED",
User: "foo:bar",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
defer setGetDistIDLikeForTest(tc.distIdsLike)()
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
tomlCfg, err := loadConfigTomlFrom(reader)
@@ -236,3 +319,19 @@ func TestGetConfig(t *testing.T) {
})
}
}
// setGetDistIDsLikeForTest overrides the distribution IDs that would normally be read from the /etc/os-release file.
func setGetDistIDLikeForTest(ids []string) func() {
if ids == nil {
return func() {}
}
original := getDistIDLike
getDistIDLike = func() []string {
return ids
}
return func() {
getDistIDLike = original
}
}

View File

@@ -204,7 +204,7 @@ func (t *Toml) commentDefaults() *Toml {
}
func shouldComment(key string, defaultValue interface{}, setTo interface{}) bool {
if key == "nvidia-container-cli.user" && !getCommentedUserGroup() {
if key == "nvidia-container-cli.user" && defaultValue == setTo && isSuse() {
return false
}
if key == "nvidia-container-runtime.debug" && setTo == "/dev/null" {

View File

@@ -62,7 +62,7 @@ load-kmods = true
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc"]
runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes]

View File

@@ -78,9 +78,11 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driverRoot string, nvi
[]string{
"glvnd/egl_vendor.d/10_nvidia.json",
"vulkan/icd.d/nvidia_icd.json",
"vulkan/icd.d/nvidia_layers.json",
"vulkan/implicit_layer.d/nvidia_layers.json",
"egl/egl_external_platform.d/15_nvidia_gbm.json",
"egl/egl_external_platform.d/10_nvidia_wayland.json",
"nvidia/nvoptix.bin",
},
)

View File

@@ -19,7 +19,7 @@ package discover
import (
"path/filepath"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/pkg/cdi"
)
var _ Discover = (*Hook)(nil)

View File

@@ -17,9 +17,10 @@
package edits
import (
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type device discover.Device

View File

@@ -20,9 +20,10 @@ import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
)
func TestDeviceToSpec(t *testing.T) {

View File

@@ -19,12 +19,13 @@ package edits
import (
"fmt"
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
ociSpecs "github.com/opencontainers/runtime-spec/specs-go"
)
type edits struct {

View File

@@ -17,9 +17,10 @@
package edits
import (
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type hook discover.Hook

View File

@@ -17,9 +17,10 @@
package edits
import (
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type mount discover.Mount

View File

@@ -20,9 +20,9 @@ import (
"fmt"
"strings"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
// additionalInfo allows for the info.Interface to be extened to implement the infoInterface.

View File

@@ -19,9 +19,9 @@ package info
import (
"testing"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/stretchr/testify/require"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
func TestUsesNVGPUModule(t *testing.T) {

View File

@@ -17,12 +17,13 @@
package info
import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
cdi "tags.cncf.io/container-device-interface/pkg/parser"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// infoInterface provides an alias for mocking.
@@ -63,6 +64,7 @@ func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rm
// resolveMode determines the correct mode for the platform if set to "auto"
func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
if mode != "auto" {
r.logger.Infof("Using requested mode '%s'", mode)
return mode
}
defer func() {

View File

@@ -0,0 +1,63 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package devices
type builder struct {
asMap devices
filter func(string) bool
}
// New creates a new devices struct with the specified options.
func New(opts ...Option) Devices {
b := &builder{}
for _, opt := range opts {
opt(b)
}
if b.filter == nil {
b.filter = func(string) bool { return false }
}
devices := make(devices)
for k, v := range b.asMap {
if b.filter(string(k)) {
continue
}
devices[k] = v
}
return devices
}
// Option defines a functional option.
type Option func(*builder)
// WithDeviceToMajor specifies an explicit device name to major number map.
func WithDeviceToMajor(deviceToMajor map[string]int) Option {
return func(b *builder) {
b.asMap = make(devices)
for name, major := range deviceToMajor {
b.asMap[Name(name)] = Major(major)
}
}
}
// WithFilter specifies a filter to exclude devices.
func WithFilter(filter func(string) bool) Option {
return func(b *builder) {
b.filter = filter
}
}

View File

@@ -33,7 +33,7 @@ const (
NVIDIAModesetMinor = 254
NVIDIAFrontend = Name("nvidia-frontend")
NVIDIAGPU = NVIDIAFrontend
NVIDIAGPU = Name("nvidia")
NVIDIACaps = Name("nvidia-caps")
NVIDIAUVM = Name("nvidia-uvm")
@@ -53,22 +53,43 @@ type Major int
type Devices interface {
Exists(Name) bool
Get(Name) (Major, bool)
Count() int
}
type devices map[Name]Major
var _ Devices = devices(nil)
// Count returns the number of devices defined.
func (d devices) Count() int {
return len(d)
}
// Exists checks if a Device with a given name exists or not
func (d devices) Exists(name Name) bool {
_, exists := d[name]
_, exists := d.Get(name)
return exists
}
// Get a Device from Devices
// Get a Device from Devices. It also has fallback logic to ensure device name changes in /proc/devices are handled
// For e.g:- For GPU drivers 550.40.x or greater, the gpu device has been renamed from "nvidia-frontend" to "nvidia".
func (d devices) Get(name Name) (Major, bool) {
device, exists := d[name]
return device, exists
for _, n := range name.getWithFallback() {
device, exists := d[n]
if exists {
return device, true
}
}
return 0, false
}
// getWithFallback returns a prioritised list of device names for a specific name.
// This allows multiple names to be associated with a single name to support various driver versions.
func (n Name) getWithFallback() []Name {
if n == NVIDIAGPU || n == NVIDIAFrontend {
return []Name{NVIDIAGPU, NVIDIAFrontend}
}
return []Name{n}
}
// GetNVIDIADevices returns the set of NVIDIA Devices on the machine
@@ -94,27 +115,23 @@ func nvidiaDevices(devicesPath string) (Devices, error) {
var errNoNvidiaDevices = errors.New("no NVIDIA devices found")
func nvidiaDeviceFrom(reader io.Reader) (devices, error) {
func nvidiaDeviceFrom(reader io.Reader) (Devices, error) {
allDevices := devicesFrom(reader)
nvidiaDevices := make(devices)
var hasNvidiaDevices bool
for n, d := range allDevices {
if !strings.HasPrefix(string(n), nvidiaDevicePrefix) {
continue
}
nvidiaDevices[n] = d
hasNvidiaDevices = true
}
if !hasNvidiaDevices {
nvidiaDevices := New(
WithDeviceToMajor(allDevices),
WithFilter(func(n string) bool {
return !strings.HasPrefix(n, nvidiaDevicePrefix)
}),
)
if nvidiaDevices.Count() == 0 {
return nil, errNoNvidiaDevices
}
return nvidiaDevices, nil
}
func devicesFrom(reader io.Reader) devices {
allDevices := make(devices)
func devicesFrom(reader io.Reader) map[string]int {
allDevices := make(map[string]int)
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
device, major, err := processProcDeviceLine(scanner.Text())
@@ -126,11 +143,11 @@ func devicesFrom(reader io.Reader) devices {
return allDevices
}
func processProcDeviceLine(line string) (Name, Major, error) {
func processProcDeviceLine(line string) (string, int, error) {
trimmed := strings.TrimSpace(line)
var name Name
var major Major
var name string
var major int
n, _ := fmt.Sscanf(trimmed, "%d %s", &major, &name)
if n == 2 {

View File

@@ -17,6 +17,9 @@ var _ Devices = &DevicesMock{}
//
// // make and configure a mocked Devices
// mockedDevices := &DevicesMock{
// CountFunc: func() int {
// panic("mock out the Count method")
// },
// ExistsFunc: func(name Name) bool {
// panic("mock out the Exists method")
// },
@@ -30,6 +33,9 @@ var _ Devices = &DevicesMock{}
//
// }
type DevicesMock struct {
// CountFunc mocks the Count method.
CountFunc func() int
// ExistsFunc mocks the Exists method.
ExistsFunc func(name Name) bool
@@ -38,6 +44,9 @@ type DevicesMock struct {
// calls tracks calls to the methods.
calls struct {
// Count holds details about calls to the Count method.
Count []struct {
}
// Exists holds details about calls to the Exists method.
Exists []struct {
// Name is the name argument value.
@@ -49,10 +58,41 @@ type DevicesMock struct {
Name Name
}
}
lockCount sync.RWMutex
lockExists sync.RWMutex
lockGet sync.RWMutex
}
// Count calls CountFunc.
func (mock *DevicesMock) Count() int {
callInfo := struct {
}{}
mock.lockCount.Lock()
mock.calls.Count = append(mock.calls.Count, callInfo)
mock.lockCount.Unlock()
if mock.CountFunc == nil {
var (
nOut int
)
return nOut
}
return mock.CountFunc()
}
// CountCalls gets all the calls that were made to Count.
// Check the length with:
//
// len(mockedDevices.CountCalls())
func (mock *DevicesMock) CountCalls() []struct {
} {
var calls []struct {
}
mock.lockCount.RLock()
calls = mock.calls.Count
mock.lockCount.RUnlock()
return calls
}
// Exists calls ExistsFunc.
func (mock *DevicesMock) Exists(name Name) bool {
callInfo := struct {

View File

@@ -25,22 +25,46 @@ import (
)
func TestNvidiaDevices(t *testing.T) {
devices := map[Name]Major{
"nvidia-frontend": 195,
"nvidia-nvlink": 234,
"nvidia-caps": 235,
"nvidia-uvm": 510,
"nvidia-nvswitch": 511,
perDriverDeviceMaps := map[string]map[string]int{
"pre550": {
"nvidia-frontend": 195,
"nvidia-nvlink": 234,
"nvidia-caps": 235,
"nvidia-uvm": 510,
"nvidia-nvswitch": 511,
},
"post550": {
"nvidia": 195,
"nvidia-nvlink": 234,
"nvidia-caps": 235,
"nvidia-uvm": 510,
"nvidia-nvswitch": 511,
},
}
nvidiaDevices := testDevices(devices)
for name, major := range devices {
device, exists := nvidiaDevices.Get(name)
require.True(t, exists, "Unexpected missing device")
require.Equal(t, device, major, "Unexpected device major")
for k, devices := range perDriverDeviceMaps {
nvidiaDevices := New(WithDeviceToMajor(devices))
t.Run(k, func(t *testing.T) {
// Each of the expected devices needs to exist.
for name, major := range devices {
device, exists := nvidiaDevices.Get(Name(name))
require.True(t, exists)
require.Equal(t, device, Major(major))
}
// An unexpected device cannot exist
_, exists := nvidiaDevices.Get("bogus")
require.False(t, exists)
// Regardles of the driver version, the nvidia and nvidia-frontend
// names are supported and have the same value.
nvidia, exists := nvidiaDevices.Get(NVIDIAGPU)
require.True(t, exists)
nvidiaFrontend, exists := nvidiaDevices.Get(NVIDIAFrontend)
require.True(t, exists)
require.Equal(t, nvidia, nvidiaFrontend)
})
}
_, exists := nvidiaDevices.Get("bogus")
require.False(t, exists, "Unexpected 'bogus' device found")
}
func TestProcessDeviceFile(t *testing.T) {
@@ -52,6 +76,7 @@ func TestProcessDeviceFile(t *testing.T) {
{lines: []string{}, expectedError: errNoNvidiaDevices},
{lines: []string{"Not a valid line:"}, expectedError: errNoNvidiaDevices},
{lines: []string{"195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
{lines: []string{"195 nvidia"}, expected: devices{"nvidia": 195}},
{lines: []string{"195 nvidia-frontend", "235 nvidia-caps"}, expected: devices{"nvidia-frontend": 195, "nvidia-caps": 235}},
{lines: []string{" 195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
{lines: []string{"Not a valid line:", "", "195 nvidia-frontend"}, expected: devices{"nvidia-frontend": 195}},
@@ -63,7 +88,10 @@ func TestProcessDeviceFile(t *testing.T) {
d, err := nvidiaDeviceFrom(contents)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expected, d)
if tc.expectedError == nil {
require.EqualValues(t, tc.expected, d.(devices))
}
})
}
}
@@ -71,8 +99,8 @@ func TestProcessDeviceFile(t *testing.T) {
func TestProcessDeviceFileLine(t *testing.T) {
testCases := []struct {
line string
name Name
major Major
name string
major int
err bool
}{
{"", "", 0, true},
@@ -97,8 +125,3 @@ func TestProcessDeviceFileLine(t *testing.T) {
})
}
}
// testDevices creates a set of test NVIDIA devices
func testDevices(d map[Name]Major) Devices {
return devices(d)
}

View File

@@ -20,12 +20,13 @@ import (
"fmt"
"strings"
"tags.cncf.io/container-device-interface/pkg/parser"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
)
type cdiModifier struct {

View File

@@ -19,10 +19,11 @@ package cdi
import (
"fmt"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type builder struct {

View File

@@ -20,10 +20,11 @@ import (
"errors"
"fmt"
"github.com/opencontainers/runtime-spec/specs-go"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/opencontainers/runtime-spec/specs-go"
)
// fromRegistry represents the modifications performed using a CDI registry.

View File

@@ -19,9 +19,10 @@ package cdi
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/opencontainers/runtime-spec/specs-go"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
// fromCDISpec represents the modifications performed from a raw CDI spec.

View File

@@ -28,15 +28,18 @@ import (
func TestCreateControlDevices(t *testing.T) {
logger, _ := testlog.NewNullLogger()
nvidiaDevices := &devices.DevicesMock{
GetFunc: func(name devices.Name) (devices.Major, bool) {
devices := map[devices.Name]devices.Major{
"nvidia-frontend": 195,
"nvidia-uvm": 243,
}
return devices[name], true
},
}
nvidiaDevices := devices.New(
devices.WithDeviceToMajor(map[string]int{
"nvidia-frontend": 195,
"nvidia-uvm": 243,
}),
)
nvidia550Devices := devices.New(
devices.WithDeviceToMajor(map[string]int{
"nvidia": 195,
"nvidia-uvm": 243,
}),
)
mknodeError := errors.New("mknode error")
@@ -53,7 +56,7 @@ func TestCreateControlDevices(t *testing.T) {
}
}{
{
description: "no root specified",
description: "no root specified; pre 550 driver",
root: "",
devices: nvidiaDevices,
mknodeError: nil,
@@ -68,6 +71,22 @@ func TestCreateControlDevices(t *testing.T) {
{"/dev/nvidia-uvm-tools", 243, 1},
},
},
{
description: "no root specified; 550 driver",
root: "",
devices: nvidia550Devices,
mknodeError: nil,
expectedCalls: []struct {
S string
N1 int
N2 int
}{
{"/dev/nvidiactl", 195, 255},
{"/dev/nvidia-modeset", 195, 254},
{"/dev/nvidia-uvm", 243, 0},
{"/dev/nvidia-uvm-tools", 243, 1},
},
},
{
description: "some root specified",
root: "/some/root",
@@ -129,5 +148,4 @@ func TestCreateControlDevices(t *testing.T) {
require.EqualValues(t, tc.expectedCalls, mknode.MknodeCalls())
})
}
}

View File

@@ -10,7 +10,7 @@ Build-Depends: debhelper (>= 9)
Package: nvidia-container-toolkit
Architecture: any
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0), libseccomp2
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0)
Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
Description: NVIDIA Container toolkit

View File

@@ -23,13 +23,6 @@ Provides: nvidia-container-runtime-hook
Requires: libnvidia-container-tools >= %{libnvidia_container_tools_version}, libnvidia-container-tools < 2.0.0
Requires: nvidia-container-toolkit-base == %{version}-%{release}
%if 0%{?suse_version}
Requires: libseccomp2
Requires: libapparmor1
%else
Requires: libseccomp
%endif
%description
Provides tools and utilities to enable GPU support in containers.

View File

@@ -20,6 +20,7 @@ package engine
type Interface interface {
DefaultRuntime() string
AddRuntime(string, string, bool) error
Set(string, interface{}) error
RemoveRuntime(string) error
Save(string) (int64, error)
}

View File

@@ -19,8 +19,9 @@ package containerd
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
"github.com/pelletier/go-toml"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
)
// ConfigV1 represents a version 1 containerd config
@@ -134,6 +135,14 @@ func (c *ConfigV1) RemoveRuntime(name string) error {
return nil
}
// Set sets the specified containerd option.
func (c *ConfigV1) Set(key string, value interface{}) error {
config := *c.Tree
config.SetPath([]string{"plugins", "cri", "containerd", key}, value)
*c.Tree = config
return nil
}
// Save wrotes the config to a file
func (c ConfigV1) Save(path string) (int64, error) {
return (Config)(c).Save(path)

View File

@@ -90,6 +90,14 @@ func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) {
return annotations, nil
}
// Set sets the specified containerd option.
func (c *Config) Set(key string, value interface{}) error {
config := *c.Tree
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", key}, value)
*c.Tree = config
return nil
}
// DefaultRuntime returns the default runtime for the cri-o config
func (c Config) DefaultRuntime() string {
if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok {

View File

@@ -99,6 +99,11 @@ func (c *Config) RemoveRuntime(name string) error {
return nil
}
// Set is not supported for cri-o configs.
func (c *Config) Set(key string, value interface{}) error {
return fmt.Errorf("Set is not supported for crio configs")
}
// Save writes the config to the specified path
func (c Config) Save(path string) (int64, error) {
config := (toml.Tree)(c)

View File

@@ -114,6 +114,12 @@ func (c *Config) RemoveRuntime(name string) error {
return nil
}
// Set sets the specified docker option
func (c *Config) Set(key string, value interface{}) error {
(*c)[key] = value
return nil
}
// Save writes the config to the specified path
func (c Config) Save(path string) (int64, error) {
output, err := json.MarshalIndent(c, "", " ")

View File

@@ -17,10 +17,11 @@
package nvcdi
import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
)
const (

View File

@@ -23,7 +23,7 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.

View File

@@ -22,12 +22,13 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"golang.org/x/sys/unix"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"golang.org/x/sys/unix"
)
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.

View File

@@ -33,6 +33,7 @@ var requiredDriverStoreFiles = []string{
"libnvidia-ml.so.1", /* Core library for nvml */
"libnvidia-ml_loader.so", /* Core library for nvml on WSL */
"libdxcore.so", /* Core library for dxcore support */
"libnvdxgdmal.so.1", /* dxgdmal library for cuda */
"nvcubins.bin", /* Binary containing GPU code for cuda */
"nvidia-smi", /* nvidia-smi binary*/
}

View File

@@ -22,14 +22,15 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.

View File

@@ -19,12 +19,13 @@ package nvcdi
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
)
type gdslib nvcdilib

View File

@@ -19,13 +19,14 @@ package nvcdi
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
)
type csvlib nvcdilib

View File

@@ -19,12 +19,13 @@ package nvcdi
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
type nvmllib nvcdilib

View File

@@ -19,11 +19,12 @@ package nvcdi
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
)
type wsllib nvcdilib

View File

@@ -19,13 +19,14 @@ package nvcdi
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
type wrapper struct {

View File

@@ -21,13 +21,14 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
)
type managementlib nvcdilib

View File

@@ -19,14 +19,15 @@ package nvcdi
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// GetMIGDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.

View File

@@ -19,12 +19,13 @@ package nvcdi
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
)
type mofedlib nvcdilib

View File

@@ -20,7 +20,7 @@ import (
"errors"
"fmt"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
// UUIDer is an interface for getting UUIDs.

View File

@@ -4,7 +4,7 @@
package nvcdi
import (
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"sync"
)

View File

@@ -19,8 +19,8 @@ package nvcdi
import (
"testing"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/stretchr/testify/require"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
func TestConvert(t *testing.T) {

View File

@@ -17,10 +17,11 @@
package nvcdi
import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// Option is a function that configures the nvcdilib

View File

@@ -19,7 +19,7 @@ package spec
import (
"io"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/specs-go"
)
const (

View File

@@ -20,9 +20,10 @@ import (
"fmt"
"os"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
type builder struct {

View File

@@ -22,8 +22,8 @@ import (
"os"
"path/filepath"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
)
type spec struct {

View File

@@ -16,7 +16,7 @@
package transform
import "github.com/container-orchestrated-devices/container-device-interface/specs-go"
import "tags.cncf.io/container-device-interface/specs-go"
// Transformer defines the API for applying arbitrary transforms to a spec in-place
type Transformer interface {

View File

@@ -17,7 +17,7 @@
package transform
import (
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/specs-go"
)
type dedupe struct{}

View File

@@ -19,8 +19,8 @@ package transform
import (
"testing"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
"tags.cncf.io/container-device-interface/specs-go"
)
func TestDeduplicate(t *testing.T) {

View File

@@ -20,7 +20,7 @@ package transform
import (
"encoding/json"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/specs-go"
)
type containerEdits specs.ContainerEdits

View File

@@ -16,7 +16,7 @@
package transform
import "github.com/container-orchestrated-devices/container-device-interface/specs-go"
import "tags.cncf.io/container-device-interface/specs-go"
type merged []Transformer

View File

@@ -19,9 +19,10 @@ package transform
import (
"fmt"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
)
const (

View File

@@ -20,8 +20,8 @@ import (
"fmt"
"testing"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
"tags.cncf.io/container-device-interface/specs-go"
)
func TestMergeDeviceSpecs(t *testing.T) {

View File

@@ -17,7 +17,7 @@
package transform
import (
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/specs-go"
)
type noop struct{}

View File

@@ -19,7 +19,7 @@ package transform
import (
"fmt"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/specs-go"
)
type remove map[string]bool

View File

@@ -21,7 +21,7 @@ import (
"path/filepath"
"strings"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/specs-go"
)
type rootTransformer struct {

View File

@@ -19,8 +19,8 @@ package transform
import (
"testing"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
"tags.cncf.io/container-device-interface/specs-go"
)
func TestRootTransformer(t *testing.T) {

View File

@@ -19,7 +19,7 @@ package transform
import (
"fmt"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/specs-go"
)
type simplify struct{}

View File

@@ -19,8 +19,8 @@ package transform
import (
"testing"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
"tags.cncf.io/container-device-interface/specs-go"
)
func TestSimplify(t *testing.T) {

View File

@@ -22,7 +22,7 @@ import (
"sort"
"strings"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"tags.cncf.io/container-device-interface/specs-go"
)
type sorter struct{}

View File

@@ -19,8 +19,8 @@ package transform
import (
"testing"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/stretchr/testify/require"
"tags.cncf.io/container-device-interface/specs-go"
)
func TestSortSpec(t *testing.T) {

View File

@@ -31,6 +31,8 @@ else
targets=${all[@]}
fi
# Skip component updates on release branches
SKIP_UPDATE_COMPONENTS=yes
if [[ x"${SKIP_UPDATE_COMPONENTS}" != x"yes" ]]; then
echo "Updating components"
"${SCRIPTS_DIR}/update-components.sh"

View File

@@ -25,7 +25,7 @@ RUN fpm -s empty \
rm -f /tmp/docker.rpm
RUN curl -s -L https://nvidia.github.io/libnvidia-container/centos8/libnvidia-container.repo \
RUN curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/libnvidia-container.repo \
| tee /etc/yum.repos.d/nvidia-container-toolkit.repo
COPY entrypoint.sh /

View File

@@ -23,13 +23,14 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
toml "github.com/pelletier/go-toml"
log "github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
"tags.cncf.io/container-device-interface/pkg/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
)
const (

View File

@@ -17,7 +17,7 @@
package device
import (
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
// Interface provides the API to the 'device' package

View File

@@ -19,8 +19,7 @@ package device
import (
"fmt"
"github.com/NVIDIA/go-nvml/pkg/dl"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
// Device defines the set of extended functions associated with a device.Device
@@ -152,8 +151,7 @@ func (d *device) GetCudaComputeCapabilityAsString() (string, error) {
// IsMigCapable checks if a device is capable of having MIG paprtitions created on it
func (d *device) IsMigCapable() (bool, error) {
err := d.lib.nvmlLookupSymbol("nvmlDeviceGetMigMode")
if err != nil {
if !d.lib.hasSymbol("nvmlDeviceGetMigMode") {
return false, nil
}
@@ -170,8 +168,7 @@ func (d *device) IsMigCapable() (bool, error) {
// IsMigEnabled checks if a device has MIG mode currently enabled on it
func (d *device) IsMigEnabled() (bool, error) {
err := d.lib.nvmlLookupSymbol("nvmlDeviceGetMigMode")
if err != nil {
if !d.lib.hasSymbol("nvmlDeviceGetMigMode") {
return false, nil
}
@@ -465,22 +462,12 @@ func (d *devicelib) GetMigProfiles() ([]MigProfile, error) {
return profiles, nil
}
// nvmlLookupSymbol checks to see if the given symbol is present in the NVML library
func (d *devicelib) nvmlLookupSymbol(symbol string) error {
// If devicelib is configured to not verify symbols, then we short-circuit here
// hasSymbol checks to see if the given symbol is present in the NVML library.
// If devicelib is configured to not verify symbols, then all symbols are assumed to exist.
func (d *devicelib) hasSymbol(symbol string) bool {
if !*d.verifySymbols {
return nil
return true
}
// Otherwise we lookup the provided symbol and verify it is available
lib := dl.New("libnvidia-ml.so.1", dl.RTLD_LAZY|dl.RTLD_GLOBAL)
if lib == nil {
return fmt.Errorf("error instantiating DynamicLibrary for NVML")
}
err := lib.Open()
if err != nil {
return fmt.Errorf("error opening DynamicLibrary for NVML: %v", err)
}
defer lib.Close()
return lib.Lookup(symbol)
return d.nvml.Lookup(symbol) == nil
}

View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package device
import (
"strconv"
"strings"
"github.com/google/uuid"
)
// Identifier can be used to refer to a GPU or MIG device.
// This includes a device index or UUID.
type Identifier string
// IsGpuIndex checks if an identifier is a full GPU index
func (i Identifier) IsGpuIndex() bool {
if _, err := strconv.ParseUint(string(i), 10, 0); err != nil {
return false
}
return true
}
// IsMigIndex checks if an identifier is a MIG index
func (i Identifier) IsMigIndex() bool {
split := strings.Split(string(i), ":")
if len(split) != 2 {
return false
}
for _, s := range split {
if !Identifier(s).IsGpuIndex() {
return false
}
}
return true
}
// IsUUID checks if an identifier is a UUID
func (i Identifier) IsUUID() bool {
return i.IsGpuUUID() || i.IsMigUUID()
}
// IsGpuUUID checks if an identifier is a GPU UUID
// A GPU UUID must be of the form GPU-b1028956-cfa2-0990-bf4a-5da9abb51763
func (i Identifier) IsGpuUUID() bool {
if !strings.HasPrefix(string(i), "GPU-") {
return false
}
_, err := uuid.Parse(strings.TrimPrefix(string(i), "GPU-"))
return err == nil
}
// IsMigUUID checks if an identifier is a MIG UUID
// A MIG UUID can be of one of two forms:
// - MIG-b1028956-cfa2-0990-bf4a-5da9abb51763
// - MIG-GPU-b1028956-cfa2-0990-bf4a-5da9abb51763/3/0
func (i Identifier) IsMigUUID() bool {
if !strings.HasPrefix(string(i), "MIG-") {
return false
}
suffix := strings.TrimPrefix(string(i), "MIG-")
_, err := uuid.Parse(suffix)
if err == nil {
return true
}
split := strings.Split(suffix, "/")
if len(split) != 3 {
return false
}
if !Identifier(split[0]).IsGpuUUID() {
return false
}
for _, s := range split[1:] {
_, err := strconv.ParseUint(s, 10, 0)
if err != nil {
return false
}
}
return true
}

View File

@@ -19,7 +19,7 @@ package device
import (
"fmt"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
// MigDevice defines the set of extended functions associated with a MIG device

View File

@@ -23,7 +23,7 @@ import (
"strconv"
"strings"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
const (

View File

@@ -20,6 +20,11 @@ import (
"github.com/NVIDIA/go-nvml/pkg/nvml"
)
// General untyped constants
const (
NVLINK_MAX_LINKS = nvml.NVLINK_MAX_LINKS
)
// Return constants
const (
SUCCESS = Return(nvml.SUCCESS)
@@ -131,3 +136,28 @@ const (
EventTypeSingleBitEccError = nvml.EventTypeSingleBitEccError
EventTypeDoubleBitEccError = nvml.EventTypeDoubleBitEccError
)
// GPU Topology enumeration
const (
TOPOLOGY_INTERNAL = GpuTopologyLevel(nvml.TOPOLOGY_INTERNAL)
TOPOLOGY_SINGLE = GpuTopologyLevel(nvml.TOPOLOGY_SINGLE)
TOPOLOGY_MULTIPLE = GpuTopologyLevel(nvml.TOPOLOGY_MULTIPLE)
TOPOLOGY_HOSTBRIDGE = GpuTopologyLevel(nvml.TOPOLOGY_HOSTBRIDGE)
TOPOLOGY_NODE = GpuTopologyLevel(nvml.TOPOLOGY_NODE)
TOPOLOGY_SYSTEM = GpuTopologyLevel(nvml.TOPOLOGY_SYSTEM)
)
// Generic enable/disable constants
const (
FEATURE_DISABLED = EnableState(nvml.FEATURE_DISABLED)
FEATURE_ENABLED = EnableState(nvml.FEATURE_ENABLED)
)
// Compute mode constants
const (
COMPUTEMODE_DEFAULT = ComputeMode(nvml.COMPUTEMODE_DEFAULT)
COMPUTEMODE_EXCLUSIVE_THREAD = ComputeMode(nvml.COMPUTEMODE_EXCLUSIVE_THREAD)
COMPUTEMODE_PROHIBITED = ComputeMode(nvml.COMPUTEMODE_PROHIBITED)
COMPUTEMODE_EXCLUSIVE_PROCESS = ComputeMode(nvml.COMPUTEMODE_EXCLUSIVE_PROCESS)
COMPUTEMODE_COUNT = ComputeMode(nvml.COMPUTEMODE_COUNT)
)

View File

@@ -22,6 +22,11 @@ type nvmlDevice nvml.Device
var _ Device = (*nvmlDevice)(nil)
// nvmlDeviceHandle returns a pointer to the underlying device.
func (d nvmlDevice) nvmlDeviceHandle() *nvml.Device {
return (*nvml.Device)(&d)
}
// GetIndex returns the index of a Device
func (d nvmlDevice) GetIndex() (int, Return) {
i, r := nvml.Device(d).GetIndex()
@@ -178,3 +183,33 @@ func (d nvmlDevice) GetSupportedEventTypes() (uint64, Return) {
e, r := nvml.Device(d).GetSupportedEventTypes()
return e, Return(r)
}
// GetTopologyCommonAncestor retrieves the common ancestor for two devices.
func (d nvmlDevice) GetTopologyCommonAncestor(o Device) (GpuTopologyLevel, Return) {
other := o.nvmlDeviceHandle()
if other == nil {
return 0, ERROR_INVALID_ARGUMENT
}
l, r := nvml.Device(d).GetTopologyCommonAncestor(*other)
return GpuTopologyLevel(l), Return(r)
}
// GetNvLinkState retrieves the state of the device's NvLink for the link specified.
func (d nvmlDevice) GetNvLinkState(link int) (EnableState, Return) {
s, r := nvml.Device(d).GetNvLinkState(link)
return EnableState(s), Return(r)
}
// GetNvLinkRemotePciInfo retrieves the PCI information for the remote node on a NvLink link.
// Note: pciSubSystemId is not filled in this function and is indeterminate.
func (d nvmlDevice) GetNvLinkRemotePciInfo(link int) (PciInfo, Return) {
p, r := nvml.Device(d).GetNvLinkRemotePciInfo(link)
return PciInfo(p), Return(r)
}
// SetComputeMode sets the compute mode for the device.
func (d nvmlDevice) SetComputeMode(mode ComputeMode) Return {
r := nvml.Device(d).SetComputeMode(nvml.ComputeMode(mode))
return Return(r)
}

View File

@@ -4,6 +4,7 @@
package nvml
import (
"github.com/NVIDIA/go-nvml/pkg/nvml"
"sync"
)
@@ -74,12 +75,21 @@ var _ Device = &DeviceMock{}
// GetNameFunc: func() (string, Return) {
// panic("mock out the GetName method")
// },
// GetNvLinkRemotePciInfoFunc: func(n int) (PciInfo, Return) {
// panic("mock out the GetNvLinkRemotePciInfo method")
// },
// GetNvLinkStateFunc: func(n int) (EnableState, Return) {
// panic("mock out the GetNvLinkState method")
// },
// GetPciInfoFunc: func() (PciInfo, Return) {
// panic("mock out the GetPciInfo method")
// },
// GetSupportedEventTypesFunc: func() (uint64, Return) {
// panic("mock out the GetSupportedEventTypes method")
// },
// GetTopologyCommonAncestorFunc: func(device Device) (GpuTopologyLevel, Return) {
// panic("mock out the GetTopologyCommonAncestor method")
// },
// GetUUIDFunc: func() (string, Return) {
// panic("mock out the GetUUID method")
// },
@@ -89,9 +99,15 @@ var _ Device = &DeviceMock{}
// RegisterEventsFunc: func(v uint64, eventSet EventSet) Return {
// panic("mock out the RegisterEvents method")
// },
// SetComputeModeFunc: func(computeMode ComputeMode) Return {
// panic("mock out the SetComputeMode method")
// },
// SetMigModeFunc: func(Mode int) (Return, Return) {
// panic("mock out the SetMigMode method")
// },
// nvmlDeviceHandleFunc: func() *nvml.Device {
// panic("mock out the nvmlDeviceHandle method")
// },
// }
//
// // use mockedDevice in code that requires Device
@@ -156,12 +172,21 @@ type DeviceMock struct {
// GetNameFunc mocks the GetName method.
GetNameFunc func() (string, Return)
// GetNvLinkRemotePciInfoFunc mocks the GetNvLinkRemotePciInfo method.
GetNvLinkRemotePciInfoFunc func(n int) (PciInfo, Return)
// GetNvLinkStateFunc mocks the GetNvLinkState method.
GetNvLinkStateFunc func(n int) (EnableState, Return)
// GetPciInfoFunc mocks the GetPciInfo method.
GetPciInfoFunc func() (PciInfo, Return)
// GetSupportedEventTypesFunc mocks the GetSupportedEventTypes method.
GetSupportedEventTypesFunc func() (uint64, Return)
// GetTopologyCommonAncestorFunc mocks the GetTopologyCommonAncestor method.
GetTopologyCommonAncestorFunc func(device Device) (GpuTopologyLevel, Return)
// GetUUIDFunc mocks the GetUUID method.
GetUUIDFunc func() (string, Return)
@@ -171,9 +196,15 @@ type DeviceMock struct {
// RegisterEventsFunc mocks the RegisterEvents method.
RegisterEventsFunc func(v uint64, eventSet EventSet) Return
// SetComputeModeFunc mocks the SetComputeMode method.
SetComputeModeFunc func(computeMode ComputeMode) Return
// SetMigModeFunc mocks the SetMigMode method.
SetMigModeFunc func(Mode int) (Return, Return)
// nvmlDeviceHandleFunc mocks the nvmlDeviceHandle method.
nvmlDeviceHandleFunc func() *nvml.Device
// calls tracks calls to the methods.
calls struct {
// CreateGpuInstanceWithPlacement holds details about calls to the CreateGpuInstanceWithPlacement method.
@@ -247,12 +278,27 @@ type DeviceMock struct {
// GetName holds details about calls to the GetName method.
GetName []struct {
}
// GetNvLinkRemotePciInfo holds details about calls to the GetNvLinkRemotePciInfo method.
GetNvLinkRemotePciInfo []struct {
// N is the n argument value.
N int
}
// GetNvLinkState holds details about calls to the GetNvLinkState method.
GetNvLinkState []struct {
// N is the n argument value.
N int
}
// GetPciInfo holds details about calls to the GetPciInfo method.
GetPciInfo []struct {
}
// GetSupportedEventTypes holds details about calls to the GetSupportedEventTypes method.
GetSupportedEventTypes []struct {
}
// GetTopologyCommonAncestor holds details about calls to the GetTopologyCommonAncestor method.
GetTopologyCommonAncestor []struct {
// Device is the device argument value.
Device Device
}
// GetUUID holds details about calls to the GetUUID method.
GetUUID []struct {
}
@@ -266,11 +312,19 @@ type DeviceMock struct {
// EventSet is the eventSet argument value.
EventSet EventSet
}
// SetComputeMode holds details about calls to the SetComputeMode method.
SetComputeMode []struct {
// ComputeMode is the computeMode argument value.
ComputeMode ComputeMode
}
// SetMigMode holds details about calls to the SetMigMode method.
SetMigMode []struct {
// Mode is the Mode argument value.
Mode int
}
// nvmlDeviceHandle holds details about calls to the nvmlDeviceHandle method.
nvmlDeviceHandle []struct {
}
}
lockCreateGpuInstanceWithPlacement sync.RWMutex
lockGetArchitecture sync.RWMutex
@@ -291,12 +345,17 @@ type DeviceMock struct {
lockGetMigMode sync.RWMutex
lockGetMinorNumber sync.RWMutex
lockGetName sync.RWMutex
lockGetNvLinkRemotePciInfo sync.RWMutex
lockGetNvLinkState sync.RWMutex
lockGetPciInfo sync.RWMutex
lockGetSupportedEventTypes sync.RWMutex
lockGetTopologyCommonAncestor sync.RWMutex
lockGetUUID sync.RWMutex
lockIsMigDeviceHandle sync.RWMutex
lockRegisterEvents sync.RWMutex
lockSetComputeMode sync.RWMutex
lockSetMigMode sync.RWMutex
locknvmlDeviceHandle sync.RWMutex
}
// CreateGpuInstanceWithPlacement calls CreateGpuInstanceWithPlacementFunc.
@@ -846,6 +905,70 @@ func (mock *DeviceMock) GetNameCalls() []struct {
return calls
}
// GetNvLinkRemotePciInfo calls GetNvLinkRemotePciInfoFunc.
func (mock *DeviceMock) GetNvLinkRemotePciInfo(n int) (PciInfo, Return) {
if mock.GetNvLinkRemotePciInfoFunc == nil {
panic("DeviceMock.GetNvLinkRemotePciInfoFunc: method is nil but Device.GetNvLinkRemotePciInfo was just called")
}
callInfo := struct {
N int
}{
N: n,
}
mock.lockGetNvLinkRemotePciInfo.Lock()
mock.calls.GetNvLinkRemotePciInfo = append(mock.calls.GetNvLinkRemotePciInfo, callInfo)
mock.lockGetNvLinkRemotePciInfo.Unlock()
return mock.GetNvLinkRemotePciInfoFunc(n)
}
// GetNvLinkRemotePciInfoCalls gets all the calls that were made to GetNvLinkRemotePciInfo.
// Check the length with:
//
// len(mockedDevice.GetNvLinkRemotePciInfoCalls())
func (mock *DeviceMock) GetNvLinkRemotePciInfoCalls() []struct {
N int
} {
var calls []struct {
N int
}
mock.lockGetNvLinkRemotePciInfo.RLock()
calls = mock.calls.GetNvLinkRemotePciInfo
mock.lockGetNvLinkRemotePciInfo.RUnlock()
return calls
}
// GetNvLinkState calls GetNvLinkStateFunc.
func (mock *DeviceMock) GetNvLinkState(n int) (EnableState, Return) {
if mock.GetNvLinkStateFunc == nil {
panic("DeviceMock.GetNvLinkStateFunc: method is nil but Device.GetNvLinkState was just called")
}
callInfo := struct {
N int
}{
N: n,
}
mock.lockGetNvLinkState.Lock()
mock.calls.GetNvLinkState = append(mock.calls.GetNvLinkState, callInfo)
mock.lockGetNvLinkState.Unlock()
return mock.GetNvLinkStateFunc(n)
}
// GetNvLinkStateCalls gets all the calls that were made to GetNvLinkState.
// Check the length with:
//
// len(mockedDevice.GetNvLinkStateCalls())
func (mock *DeviceMock) GetNvLinkStateCalls() []struct {
N int
} {
var calls []struct {
N int
}
mock.lockGetNvLinkState.RLock()
calls = mock.calls.GetNvLinkState
mock.lockGetNvLinkState.RUnlock()
return calls
}
// GetPciInfo calls GetPciInfoFunc.
func (mock *DeviceMock) GetPciInfo() (PciInfo, Return) {
if mock.GetPciInfoFunc == nil {
@@ -900,6 +1023,38 @@ func (mock *DeviceMock) GetSupportedEventTypesCalls() []struct {
return calls
}
// GetTopologyCommonAncestor calls GetTopologyCommonAncestorFunc.
func (mock *DeviceMock) GetTopologyCommonAncestor(device Device) (GpuTopologyLevel, Return) {
if mock.GetTopologyCommonAncestorFunc == nil {
panic("DeviceMock.GetTopologyCommonAncestorFunc: method is nil but Device.GetTopologyCommonAncestor was just called")
}
callInfo := struct {
Device Device
}{
Device: device,
}
mock.lockGetTopologyCommonAncestor.Lock()
mock.calls.GetTopologyCommonAncestor = append(mock.calls.GetTopologyCommonAncestor, callInfo)
mock.lockGetTopologyCommonAncestor.Unlock()
return mock.GetTopologyCommonAncestorFunc(device)
}
// GetTopologyCommonAncestorCalls gets all the calls that were made to GetTopologyCommonAncestor.
// Check the length with:
//
// len(mockedDevice.GetTopologyCommonAncestorCalls())
func (mock *DeviceMock) GetTopologyCommonAncestorCalls() []struct {
Device Device
} {
var calls []struct {
Device Device
}
mock.lockGetTopologyCommonAncestor.RLock()
calls = mock.calls.GetTopologyCommonAncestor
mock.lockGetTopologyCommonAncestor.RUnlock()
return calls
}
// GetUUID calls GetUUIDFunc.
func (mock *DeviceMock) GetUUID() (string, Return) {
if mock.GetUUIDFunc == nil {
@@ -990,6 +1145,38 @@ func (mock *DeviceMock) RegisterEventsCalls() []struct {
return calls
}
// SetComputeMode calls SetComputeModeFunc.
func (mock *DeviceMock) SetComputeMode(computeMode ComputeMode) Return {
if mock.SetComputeModeFunc == nil {
panic("DeviceMock.SetComputeModeFunc: method is nil but Device.SetComputeMode was just called")
}
callInfo := struct {
ComputeMode ComputeMode
}{
ComputeMode: computeMode,
}
mock.lockSetComputeMode.Lock()
mock.calls.SetComputeMode = append(mock.calls.SetComputeMode, callInfo)
mock.lockSetComputeMode.Unlock()
return mock.SetComputeModeFunc(computeMode)
}
// SetComputeModeCalls gets all the calls that were made to SetComputeMode.
// Check the length with:
//
// len(mockedDevice.SetComputeModeCalls())
func (mock *DeviceMock) SetComputeModeCalls() []struct {
ComputeMode ComputeMode
} {
var calls []struct {
ComputeMode ComputeMode
}
mock.lockSetComputeMode.RLock()
calls = mock.calls.SetComputeMode
mock.lockSetComputeMode.RUnlock()
return calls
}
// SetMigMode calls SetMigModeFunc.
func (mock *DeviceMock) SetMigMode(Mode int) (Return, Return) {
if mock.SetMigModeFunc == nil {
@@ -1021,3 +1208,30 @@ func (mock *DeviceMock) SetMigModeCalls() []struct {
mock.lockSetMigMode.RUnlock()
return calls
}
// nvmlDeviceHandle calls nvmlDeviceHandleFunc.
func (mock *DeviceMock) nvmlDeviceHandle() *nvml.Device {
if mock.nvmlDeviceHandleFunc == nil {
panic("DeviceMock.nvmlDeviceHandleFunc: method is nil but Device.nvmlDeviceHandle was just called")
}
callInfo := struct {
}{}
mock.locknvmlDeviceHandle.Lock()
mock.calls.nvmlDeviceHandle = append(mock.calls.nvmlDeviceHandle, callInfo)
mock.locknvmlDeviceHandle.Unlock()
return mock.nvmlDeviceHandleFunc()
}
// nvmlDeviceHandleCalls gets all the calls that were made to nvmlDeviceHandle.
// Check the length with:
//
// len(mockedDevice.nvmlDeviceHandleCalls())
func (mock *DeviceMock) nvmlDeviceHandleCalls() []struct {
} {
var calls []struct {
}
mock.locknvmlDeviceHandle.RLock()
calls = mock.calls.nvmlDeviceHandle
mock.locknvmlDeviceHandle.RUnlock()
return calls
}

Some files were not shown because too many files have changed in this diff Show More