mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Add devRoot option to CDI api
A driverRoot defines both the driver library root and the root for device nodes. In the case of preinstalled drivers or the driver container, these are equal, but in cases such as GKE they do not match. In this case, drivers are extracted to a folder and devices exist at the root /. The changes here add a devRoot option to the nvcdi API that allows the parent of /dev to be specified explicitly. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
@@ -20,22 +20,19 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
|
||||
// This includes driver libraries and meta devices, for example.
|
||||
func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
|
||||
metaDevices := discover.NewDeviceDiscoverer(
|
||||
logger,
|
||||
l.logger,
|
||||
lookup.NewCharDeviceLocator(
|
||||
lookup.WithLogger(logger),
|
||||
lookup.WithRoot(driverRoot),
|
||||
lookup.WithLogger(l.logger),
|
||||
lookup.WithRoot(l.devRoot),
|
||||
),
|
||||
driverRoot,
|
||||
l.devRoot,
|
||||
[]string{
|
||||
"/dev/nvidia-modeset",
|
||||
"/dev/nvidia-uvm-tools",
|
||||
@@ -44,12 +41,12 @@ func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaC
|
||||
},
|
||||
)
|
||||
|
||||
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
|
||||
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath)
|
||||
if err != nil {
|
||||
logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
|
||||
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
|
||||
}
|
||||
|
||||
driverFiles, err := NewDriverDiscoverer(logger, driverRoot, nvidiaCTKPath, nvmllib)
|
||||
driverFiles, err := NewDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
|
||||
}
|
||||
|
||||
@@ -26,11 +26,11 @@ const (
|
||||
)
|
||||
|
||||
// newDXGDeviceDiscoverer returns a Discoverer for DXG devices under WSL2.
|
||||
func newDXGDeviceDiscoverer(logger logger.Interface, driverRoot string) discover.Discover {
|
||||
func newDXGDeviceDiscoverer(logger logger.Interface, devRoot string) discover.Discover {
|
||||
deviceNodes := discover.NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
[]string{dxgDeviceNode},
|
||||
driverRoot,
|
||||
devRoot,
|
||||
)
|
||||
|
||||
return deviceNodes
|
||||
|
||||
@@ -54,7 +54,7 @@ func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, erro
|
||||
|
||||
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.
|
||||
func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) {
|
||||
device, err := newFullGPUDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, d)
|
||||
device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCTKPath, d)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device discoverer: %v", err)
|
||||
}
|
||||
@@ -70,7 +70,7 @@ func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error
|
||||
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
|
||||
type byPathHookDiscoverer struct {
|
||||
logger logger.Interface
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCTKPath string
|
||||
pciBusID string
|
||||
deviceNodes discover.Discover
|
||||
@@ -79,7 +79,7 @@ type byPathHookDiscoverer struct {
|
||||
var _ discover.Discover = (*byPathHookDiscoverer)(nil)
|
||||
|
||||
// newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
|
||||
func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
|
||||
func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
|
||||
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
|
||||
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
|
||||
minor, ret := d.GetMinorNumber()
|
||||
@@ -104,12 +104,12 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP
|
||||
deviceNodes := discover.NewCharDeviceDiscoverer(
|
||||
logger,
|
||||
deviceNodePaths,
|
||||
driverRoot,
|
||||
devRoot,
|
||||
)
|
||||
|
||||
byPathHooks := &byPathHookDiscoverer{
|
||||
logger: logger,
|
||||
driverRoot: driverRoot,
|
||||
devRoot: devRoot,
|
||||
nvidiaCTKPath: nvidiaCTKPath,
|
||||
pciBusID: pciBusID,
|
||||
deviceNodes: deviceNodes,
|
||||
@@ -117,7 +117,7 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP
|
||||
|
||||
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
|
||||
logger,
|
||||
driverRoot,
|
||||
devRoot,
|
||||
nvidiaCTKPath,
|
||||
deviceNodes,
|
||||
)
|
||||
@@ -189,7 +189,7 @@ func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) {
|
||||
|
||||
var links []string
|
||||
for _, c := range candidates {
|
||||
linkPath := filepath.Join(d.driverRoot, c)
|
||||
linkPath := filepath.Join(d.devRoot, c)
|
||||
device, err := os.Readlink(linkPath)
|
||||
if err != nil {
|
||||
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath)
|
||||
|
||||
@@ -33,7 +33,7 @@ var _ Interface = (*gdslib)(nil)
|
||||
|
||||
// GetAllDeviceSpecs returns the device specs for all available devices.
|
||||
func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot)
|
||||
discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot, l.devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err)
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
d, err := tegra.New(
|
||||
tegra.WithLogger(l.logger),
|
||||
tegra.WithDriverRoot(l.driverRoot),
|
||||
tegra.WithDevRoot(l.devRoot),
|
||||
tegra.WithNVIDIACTKPath(l.nvidiaCTKPath),
|
||||
tegra.WithCSVFiles(l.csvFiles),
|
||||
tegra.WithLibrarySearchPaths(l.librarySearchPaths...),
|
||||
|
||||
@@ -66,7 +66,7 @@ func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
|
||||
// GetCommonEdits generates a CDI specification that can be used for ANY devices
|
||||
func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
common, err := newCommonNVMLDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
|
||||
common, err := l.newCommonNVMLDiscoverer()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ func (l *wsllib) GetSpec() (spec.Interface, error) {
|
||||
|
||||
// GetAllDeviceSpecs returns the device specs for all available devices.
|
||||
func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
device := newDXGDeviceDiscoverer(l.logger, l.driverRoot)
|
||||
device := newDXGDeviceDiscoverer(l.logger, l.devRoot)
|
||||
deviceEdits, err := edits.FromDiscoverer(device)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create container edits for DXG device: %v", err)
|
||||
|
||||
@@ -44,6 +44,7 @@ type nvcdilib struct {
|
||||
devicelib device.Interface
|
||||
deviceNamer DeviceNamer
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCTKPath string
|
||||
librarySearchPaths []string
|
||||
|
||||
@@ -76,6 +77,9 @@ func New(opts ...Option) (Interface, error) {
|
||||
if l.driverRoot == "" {
|
||||
l.driverRoot = "/"
|
||||
}
|
||||
if l.devRoot == "" {
|
||||
l.devRoot = l.driverRoot
|
||||
}
|
||||
if l.nvidiaCTKPath == "" {
|
||||
l.nvidiaCTKPath = "/usr/bin/nvidia-ctk"
|
||||
}
|
||||
|
||||
@@ -117,12 +117,12 @@ func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, erro
|
||||
"/dev/nvidia-uvm",
|
||||
"/dev/nvidiactl",
|
||||
},
|
||||
m.driverRoot,
|
||||
m.devRoot,
|
||||
)
|
||||
|
||||
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
|
||||
m.logger,
|
||||
m.driverRoot,
|
||||
m.devRoot,
|
||||
m.nvidiaCTKPath,
|
||||
deviceNodes,
|
||||
)
|
||||
|
||||
@@ -47,6 +47,13 @@ func WithDriverRoot(root string) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithDevRoot sets the root where /dev is located.
|
||||
func WithDevRoot(root string) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.devRoot = root
|
||||
}
|
||||
}
|
||||
|
||||
// WithLogger sets the logger for the library
|
||||
func WithLogger(logger logger.Interface) Option {
|
||||
return func(l *nvcdilib) {
|
||||
|
||||
@@ -26,7 +26,7 @@ import (
|
||||
|
||||
type deviceFolderPermissions struct {
|
||||
logger logger.Interface
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCTKPath string
|
||||
devices discover.Discover
|
||||
}
|
||||
@@ -39,10 +39,10 @@ var _ discover.Discover = (*deviceFolderPermissions)(nil)
|
||||
// The nested devices that are applicable to the NVIDIA GPU devices are:
|
||||
// - DRM devices at /dev/dri/*
|
||||
// - NVIDIA Caps devices at /dev/nvidia-caps/*
|
||||
func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover {
|
||||
func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover {
|
||||
d := &deviceFolderPermissions{
|
||||
logger: logger,
|
||||
driverRoot: driverRoot,
|
||||
devRoot: devRoot,
|
||||
nvidiaCTKPath: nvidiaCTKPath,
|
||||
devices: devices,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user