From 55df68451e5245fe373b0ddb3d6a2db7b4b72cb1 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 12 Jul 2023 10:49:29 +0200 Subject: [PATCH 1/5] Bump version v1.13.5 Signed-off-by: Evan Lezar --- versions.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.mk b/versions.mk index 010ed079..e4186db4 100644 --- a/versions.mk +++ b/versions.mk @@ -13,7 +13,7 @@ # limitations under the License. LIB_NAME := nvidia-container-toolkit -LIB_VERSION := 1.13.4 +LIB_VERSION := 1.13.5 LIB_TAG := # The package version is the combination of the library version and tag. From b07cf675ae7a2296b3182567b2313f61ec936880 Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Wed, 5 Jul 2023 08:43:26 +0000 Subject: [PATCH 2/5] Merge branch 'rpm-fix-missing-coreutils-during-install' into 'main' RPM spec: Avoid scriptlet failure during initial system installation See merge request nvidia/container-toolkit/container-toolkit!432 --- CHANGELOG.md | 3 +++ packaging/rpm/SPECS/nvidia-container-toolkit.spec | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 913f4d92..d17b1778 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # NVIDIA Container Toolkit Changelog +## v1.13.5 +* Remove dependency on `coreutils` when installing the NVIDIA Container Toolkit on RPM-based systems. + ## v1.13.4 * [toolkit-container] Bump CUDA base image version to 12.2.0. diff --git a/packaging/rpm/SPECS/nvidia-container-toolkit.spec b/packaging/rpm/SPECS/nvidia-container-toolkit.spec index ed44bc37..c61a47da 100644 --- a/packaging/rpm/SPECS/nvidia-container-toolkit.spec +++ b/packaging/rpm/SPECS/nvidia-container-toolkit.spec @@ -57,12 +57,14 @@ mkdir -p %{buildroot}/usr/share/containers/oci/hooks.d install -m 644 -t %{buildroot}/usr/share/containers/oci/hooks.d oci-nvidia-hook.json %post -mkdir -p %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit -cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit +if [ $1 -gt 1 ]; then # only on package upgrade + mkdir -p %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit + cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit +fi %posttrans if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then - # reparing lost file nvidia-container-runtime-hook + # repairing lost file nvidia-container-runtime-hook cp -avf %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit/nvidia-container-runtime-hook %{_bindir} fi rm -rf %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit From 05dcaf58d4a4ba89ef43b9d2401af737d49d751a Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Tue, 11 Jul 2023 09:16:33 +0000 Subject: [PATCH 3/5] Merge branch 'custom-firmware-paths' into 'main' Add firmware search paths when generating CDI specifications See merge request nvidia/container-toolkit/container-toolkit!439 --- CHANGELOG.md | 1 + pkg/nvcdi/driver-nvml.go | 62 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d17b1778..31c14638 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## v1.13.5 * Remove dependency on `coreutils` when installing the NVIDIA Container Toolkit on RPM-based systems. +* Added support for detecting GSP firmware at custom paths when generating CDI specifications. ## v1.13.4 * [toolkit-container] Bump CUDA base image version to 12.2.0. diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 408da55a..4e5fd538 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -18,6 +18,7 @@ package nvcdi import ( "fmt" + "os" "path/filepath" "strings" @@ -26,6 +27,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda" "github.com/sirupsen/logrus" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" + "golang.org/x/sys/unix" ) // NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. @@ -55,7 +57,10 @@ func newDriverVersionDiscoverer(logger *logrus.Logger, driverRoot string, nvidia return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err) } - firmwares := NewDriverFirmwareDiscoverer(logger, driverRoot, version) + firmwares, err := NewDriverFirmwareDiscoverer(logger, driverRoot, version) + if err != nil { + return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err) + } binaries := NewDriverBinariesDiscoverer(logger, driverRoot) @@ -100,18 +105,65 @@ func NewDriverLibraryDiscoverer(logger *logrus.Logger, driverRoot string, nvidia return d, nil } +func getUTSRelease() (string, error) { + utsname := &unix.Utsname{} + if err := unix.Uname(utsname); err != nil { + return "", err + } + return unix.ByteSliceToString(utsname.Release[:]), nil +} + +func getFirmwareSearchPaths(logger *logrus.Logger) ([]string, error) { + + var firmwarePaths []string + if p := getCustomFirmwareClassPath(logger); p != "" { + logger.Debugf("using custom firmware class path: %s", p) + firmwarePaths = append(firmwarePaths, p) + } + + utsRelease, err := getUTSRelease() + if err != nil { + return nil, fmt.Errorf("failed to get UTS_RELEASE: %v", err) + } + + standardPaths := []string{ + filepath.Join("/lib/firmware/updates/", utsRelease), + filepath.Join("/lib/firmware/updates/"), + filepath.Join("/lib/firmware/", utsRelease), + filepath.Join("/lib/firmware/"), + } + + return append(firmwarePaths, standardPaths...), nil +} + +// getCustomFirmwareClassPath returns the custom firmware class path if it exists. +func getCustomFirmwareClassPath(logger *logrus.Logger) string { + customFirmwareClassPath, err := os.ReadFile("/sys/module/firmware_class/parameters/path") + if err != nil { + logger.Warningf("failed to get custom firmware class path: %v", err) + return "" + } + + return strings.TrimSpace(string(customFirmwareClassPath)) +} + // NewDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version. -func NewDriverFirmwareDiscoverer(logger *logrus.Logger, driverRoot string, version string) discover.Discover { - gspFirmwarePath := filepath.Join("/lib/firmware/nvidia", version, "gsp*.bin") +func NewDriverFirmwareDiscoverer(logger *logrus.Logger, driverRoot string, version string) (discover.Discover, error) { + gspFirmwareSearchPaths, err := getFirmwareSearchPaths(logger) + if err != nil { + return nil, fmt.Errorf("failed to get firmware search paths: %v", err) + } + gspFirmwarePaths := filepath.Join("nvidia", version, "gsp*.bin") return discover.NewMounts( logger, lookup.NewFileLocator( lookup.WithLogger(logger), lookup.WithRoot(driverRoot), + lookup.WithSearchPaths(gspFirmwareSearchPaths...), ), driverRoot, - []string{gspFirmwarePath}, - ) + []string{gspFirmwarePaths}, + ), nil } // NewDriverBinariesDiscoverer creates a discoverer for GSP firmware associated with the GPU driver. From de7659881187a416a4cd7e9f6c3e0854f15abcb4 Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Mon, 17 Jul 2023 14:29:17 +0000 Subject: [PATCH 4/5] Merge branch 'remove-centos7-aarch64-scan' into 'main' Remove centos7-arm64 scan See merge request nvidia/container-toolkit/container-toolkit!445 --- .nvidia-ci.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml index 5b6d7904..0240ca6c 100644 --- a/.nvidia-ci.yml +++ b/.nvidia-ci.yml @@ -138,16 +138,6 @@ scan-centos7-amd64: needs: - image-centos7 -scan-centos7-arm64: - extends: - - .dist-centos7 - - .platform-arm64 - - .scan - needs: - - image-centos7 - - scan-centos7-amd64 - allow_failure: true - scan-ubuntu20.04-amd64: extends: - .dist-ubuntu20.04 From 60a60778aeacf666017e83ff8370326cabeab10e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 18 Jul 2023 12:21:29 +0200 Subject: [PATCH 5/5] Update libnvidia-container to 1.13.5 Signed-off-by: Evan Lezar --- CHANGELOG.md | 2 ++ third_party/libnvidia-container | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31c14638..f6cf0b92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ * Remove dependency on `coreutils` when installing the NVIDIA Container Toolkit on RPM-based systems. * Added support for detecting GSP firmware at custom paths when generating CDI specifications. +* [libnvidia-container] Include Shared Compiler Library (libnvidia-gpucomp.so) in the list of compute libaries. + ## v1.13.4 * [toolkit-container] Bump CUDA base image version to 12.2.0. diff --git a/third_party/libnvidia-container b/third_party/libnvidia-container index 31e068e7..66607bd0 160000 --- a/third_party/libnvidia-container +++ b/third_party/libnvidia-container @@ -1 +1 @@ -Subproject commit 31e068e7ab3e2294a379cbf11cc7a99281f41b66 +Subproject commit 66607bd046341f7aad7de80a9f022f122d1f2fce