2022-11-23 15:29:18 +00:00
|
|
|
/**
|
|
|
|
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
**/
|
|
|
|
|
2022-12-02 13:17:52 +00:00
|
|
|
package nvcdi
|
2022-11-23 15:29:18 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2023-07-07 13:09:12 +00:00
|
|
|
"os"
|
2022-11-28 12:27:48 +00:00
|
|
|
"path/filepath"
|
2022-11-23 15:29:18 +00:00
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
2023-03-22 12:27:43 +00:00
|
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
2022-11-23 15:29:18 +00:00
|
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
2023-03-23 20:03:52 +00:00
|
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
|
2022-11-23 15:29:18 +00:00
|
|
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
2023-07-07 13:09:12 +00:00
|
|
|
"golang.org/x/sys/unix"
|
2022-11-23 15:29:18 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
|
|
|
|
// The supplied NVML Library is used to query the expected driver version.
|
2023-03-22 12:27:43 +00:00
|
|
|
func NewDriverDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
2023-03-15 08:12:44 +00:00
|
|
|
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
|
|
|
return nil, fmt.Errorf("failed to initalize NVML: %v", r)
|
|
|
|
}
|
|
|
|
defer nvmllib.Shutdown()
|
|
|
|
|
2022-11-28 12:27:48 +00:00
|
|
|
version, r := nvmllib.SystemGetDriverVersion()
|
|
|
|
if r != nvml.SUCCESS {
|
|
|
|
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
|
|
|
}
|
|
|
|
|
2023-03-01 10:16:38 +00:00
|
|
|
return newDriverVersionDiscoverer(logger, driverRoot, nvidiaCTKPath, version)
|
|
|
|
}
|
|
|
|
|
2023-03-22 12:27:43 +00:00
|
|
|
func newDriverVersionDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, version string) (discover.Discover, error) {
|
2023-02-02 14:42:01 +00:00
|
|
|
libraries, err := NewDriverLibraryDiscoverer(logger, driverRoot, nvidiaCTKPath, version)
|
2022-11-23 15:29:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
|
|
|
|
}
|
|
|
|
|
2023-02-07 11:16:58 +00:00
|
|
|
ipcs, err := discover.NewIPCDiscoverer(logger, driverRoot)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
|
|
|
|
}
|
|
|
|
|
2023-07-07 13:09:12 +00:00
|
|
|
firmwares, err := NewDriverFirmwareDiscoverer(logger, driverRoot, version)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err)
|
|
|
|
}
|
2022-11-28 12:27:48 +00:00
|
|
|
|
2023-02-02 14:42:01 +00:00
|
|
|
binaries := NewDriverBinariesDiscoverer(logger, driverRoot)
|
2022-11-23 15:29:18 +00:00
|
|
|
|
|
|
|
d := discover.Merge(
|
|
|
|
libraries,
|
2023-02-07 11:16:58 +00:00
|
|
|
ipcs,
|
2022-11-28 12:27:48 +00:00
|
|
|
firmwares,
|
2022-11-23 15:29:18 +00:00
|
|
|
binaries,
|
|
|
|
)
|
|
|
|
|
|
|
|
return d, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
|
2023-03-22 12:27:43 +00:00
|
|
|
func NewDriverLibraryDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, version string) (discover.Discover, error) {
|
2023-02-02 14:42:01 +00:00
|
|
|
libraryPaths, err := getVersionLibs(logger, driverRoot, version)
|
2022-11-23 15:29:18 +00:00
|
|
|
if err != nil {
|
2022-11-28 12:27:48 +00:00
|
|
|
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
|
2022-11-23 15:29:18 +00:00
|
|
|
}
|
|
|
|
|
2022-12-05 16:39:22 +00:00
|
|
|
libraries := discover.NewMounts(
|
|
|
|
logger,
|
|
|
|
lookup.NewFileLocator(
|
|
|
|
lookup.WithLogger(logger),
|
2023-02-02 14:42:01 +00:00
|
|
|
lookup.WithRoot(driverRoot),
|
2022-12-05 16:39:22 +00:00
|
|
|
),
|
2023-02-02 14:42:01 +00:00
|
|
|
driverRoot,
|
2022-12-05 16:39:22 +00:00
|
|
|
libraryPaths,
|
|
|
|
)
|
|
|
|
|
2023-05-10 12:49:59 +00:00
|
|
|
hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, nvidiaCTKPath)
|
2022-12-05 16:39:22 +00:00
|
|
|
|
|
|
|
d := discover.Merge(
|
|
|
|
libraries,
|
|
|
|
hooks,
|
|
|
|
)
|
2022-11-23 15:29:18 +00:00
|
|
|
|
2022-12-05 16:39:22 +00:00
|
|
|
return d, nil
|
2022-11-23 15:29:18 +00:00
|
|
|
}
|
|
|
|
|
2023-07-07 13:19:23 +00:00
|
|
|
func getUTSRelease() (string, error) {
|
2023-07-07 13:09:12 +00:00
|
|
|
utsname := &unix.Utsname{}
|
|
|
|
if err := unix.Uname(utsname); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return unix.ByteSliceToString(utsname.Release[:]), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getFirmwareSearchPaths(logger logger.Interface) ([]string, error) {
|
2023-07-07 13:19:23 +00:00
|
|
|
utsRelease, err := getUTSRelease()
|
2023-07-07 13:09:12 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to get UTS_RELEASE: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
firmwarePaths := []string{
|
|
|
|
filepath.Join("/lib/firmware/updates/", utsRelease),
|
|
|
|
filepath.Join("/lib/firmware/updates/"),
|
|
|
|
filepath.Join("/lib/firmware/", utsRelease),
|
|
|
|
filepath.Join("/lib/firmware/"),
|
|
|
|
}
|
|
|
|
|
2023-07-07 13:19:23 +00:00
|
|
|
if p := getCustomFirmwareClassPath(logger); p != "" {
|
|
|
|
logger.Debugf("using custom firmware class path: %s", p)
|
|
|
|
firmwarePaths = append(firmwarePaths, p)
|
|
|
|
}
|
|
|
|
return firmwarePaths, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// getCustomFirmwareClassPath returns the custom firmware class path if it exists.
|
|
|
|
func getCustomFirmwareClassPath(logger logger.Interface) string {
|
2023-07-07 13:09:12 +00:00
|
|
|
customFirmwareClassPath, err := os.ReadFile("/sys/module/firmware_class/parameters/path")
|
|
|
|
if err != nil {
|
2023-07-07 13:19:23 +00:00
|
|
|
logger.Warningf("failed to get custom firmware class path: %v", err)
|
|
|
|
return ""
|
2023-07-07 13:09:12 +00:00
|
|
|
}
|
2023-07-07 13:19:23 +00:00
|
|
|
|
|
|
|
return strings.TrimSpace(string(customFirmwareClassPath))
|
2023-07-07 13:09:12 +00:00
|
|
|
}
|
|
|
|
|
2022-11-28 12:27:48 +00:00
|
|
|
// NewDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version.
|
2023-07-07 13:09:12 +00:00
|
|
|
func NewDriverFirmwareDiscoverer(logger logger.Interface, driverRoot string, version string) (discover.Discover, error) {
|
|
|
|
gspFirmwareSearchPaths, err := getFirmwareSearchPaths(logger)
|
|
|
|
if err != nil {
|
2023-07-07 13:19:23 +00:00
|
|
|
return nil, fmt.Errorf("failed to get firmware search paths: %v", err)
|
2023-07-07 13:09:12 +00:00
|
|
|
}
|
|
|
|
gspFirmwarePaths := filepath.Join("nvidia", version, "gsp*.bin")
|
2022-11-28 12:27:48 +00:00
|
|
|
return discover.NewMounts(
|
|
|
|
logger,
|
|
|
|
lookup.NewFileLocator(
|
|
|
|
lookup.WithLogger(logger),
|
2023-02-02 14:42:01 +00:00
|
|
|
lookup.WithRoot(driverRoot),
|
2023-07-07 13:09:12 +00:00
|
|
|
lookup.WithSearchPaths(gspFirmwareSearchPaths...),
|
2022-11-28 12:27:48 +00:00
|
|
|
),
|
2023-02-02 14:42:01 +00:00
|
|
|
driverRoot,
|
2023-07-07 13:09:12 +00:00
|
|
|
[]string{gspFirmwarePaths},
|
|
|
|
), nil
|
2022-11-28 12:27:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewDriverBinariesDiscoverer creates a discoverer for GSP firmware associated with the GPU driver.
|
2023-03-22 12:27:43 +00:00
|
|
|
func NewDriverBinariesDiscoverer(logger logger.Interface, driverRoot string) discover.Discover {
|
2022-11-28 12:27:48 +00:00
|
|
|
return discover.NewMounts(
|
|
|
|
logger,
|
2023-02-02 14:42:01 +00:00
|
|
|
lookup.NewExecutableLocator(logger, driverRoot),
|
|
|
|
driverRoot,
|
2022-11-28 12:27:48 +00:00
|
|
|
[]string{
|
|
|
|
"nvidia-smi", /* System management interface */
|
|
|
|
"nvidia-debugdump", /* GPU coredump utility */
|
|
|
|
"nvidia-persistenced", /* Persistence mode utility */
|
|
|
|
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
|
|
|
"nvidia-cuda-mps-server", /* Multi process service server */
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2022-12-05 16:39:22 +00:00
|
|
|
// getVersionLibs checks the LDCache for libraries ending in the specified driver version.
|
2023-02-02 14:42:01 +00:00
|
|
|
// Although the ldcache at the specified driverRoot is queried, the paths are returned relative to this driverRoot.
|
2022-12-05 16:39:22 +00:00
|
|
|
// This allows the standard mount location logic to be used for resolving the mounts.
|
2023-03-22 12:27:43 +00:00
|
|
|
func getVersionLibs(logger logger.Interface, driverRoot string, version string) ([]string, error) {
|
2022-11-23 15:29:18 +00:00
|
|
|
logger.Infof("Using driver version %v", version)
|
|
|
|
|
2023-03-23 20:03:52 +00:00
|
|
|
libCudaPaths, err := cuda.New(
|
|
|
|
cuda.WithLogger(logger),
|
|
|
|
cuda.WithDriverRoot(driverRoot),
|
|
|
|
).Locate("." + version)
|
2022-11-23 15:29:18 +00:00
|
|
|
if err != nil {
|
2023-03-23 09:50:11 +00:00
|
|
|
return nil, fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err)
|
2022-11-23 15:29:18 +00:00
|
|
|
}
|
2023-03-23 09:50:11 +00:00
|
|
|
libRoot := filepath.Dir(libCudaPaths[0])
|
2022-11-23 15:29:18 +00:00
|
|
|
|
2023-03-23 09:50:11 +00:00
|
|
|
libraries := lookup.NewFileLocator(
|
|
|
|
lookup.WithLogger(logger),
|
|
|
|
lookup.WithSearchPaths(libRoot),
|
|
|
|
lookup.WithOptional(true),
|
|
|
|
)
|
2022-11-23 15:29:18 +00:00
|
|
|
|
2023-03-23 09:50:11 +00:00
|
|
|
libs, err := libraries.Locate("*.so." + version)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to locate libraries for driver version %v: %v", version, err)
|
2022-11-23 15:29:18 +00:00
|
|
|
}
|
|
|
|
|
2023-02-02 14:42:01 +00:00
|
|
|
if driverRoot == "/" || driverRoot == "" {
|
2022-12-05 16:39:22 +00:00
|
|
|
return libs, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var relative []string
|
|
|
|
for _, l := range libs {
|
2023-02-02 14:42:01 +00:00
|
|
|
relative = append(relative, strings.TrimPrefix(l, driverRoot))
|
2022-12-05 16:39:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return relative, nil
|
2022-11-23 15:29:18 +00:00
|
|
|
}
|