Add devRoot option to CDI api

A driverRoot defines both the driver library root and the
root for device nodes. In the case of preinstalled drivers or
the driver container, these are equal, but in cases such as GKE
they do not match. In this case, drivers are extracted to a folder
and devices exist at the root /.

The changes here add a devRoot option to the nvcdi API that allows the
parent of /dev to be specified explicitly.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2023-11-14 16:57:37 +01:00
parent f6e3593a72
commit d4e21fdd10
15 changed files with 73 additions and 44 deletions

View File

@ -22,6 +22,7 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm" "github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc" "github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
@ -31,18 +32,23 @@ import (
) )
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan. // NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
func NewGraphicsDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string, nvidiaCTKPath string) (Discover, error) { func NewGraphicsDiscoverer(logger logger.Interface, cfg *config.Config, devices image.VisibleDevices) (Discover, error) {
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
// In standard usage, the devRoot is the same as the driverRoot.
devRoot := driverRoot
nvidiaCTKPath := cfg.NVIDIACTKConfig.Path
mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath) mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err) return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
} }
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot) drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err) return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
} }
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, driverRoot, nvidiaCTKPath) drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath)
discover := Merge( discover := Merge(
Merge(drmDeviceNodes, drmByPathSymlinks), Merge(drmDeviceNodes, drmByPathSymlinks),
@ -99,16 +105,16 @@ type drmDevicesByPath struct {
None None
logger logger.Interface logger logger.Interface
nvidiaCTKPath string nvidiaCTKPath string
driverRoot string devRoot string
devicesFrom Discover devicesFrom Discover
} }
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer // newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, driverRoot string, nvidiaCTKPath string) Discover { func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover {
d := drmDevicesByPath{ d := drmDevicesByPath{
logger: logger, logger: logger,
nvidiaCTKPath: nvidiaCTKPath, nvidiaCTKPath: nvidiaCTKPath,
driverRoot: driverRoot, devRoot: devRoot,
devicesFrom: devices, devicesFrom: devices,
} }
@ -155,7 +161,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
linkLocator := lookup.NewFileLocator( linkLocator := lookup.NewFileLocator(
lookup.WithLogger(d.logger), lookup.WithLogger(d.logger),
lookup.WithRoot(d.driverRoot), lookup.WithRoot(d.devRoot),
) )
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*") candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
if err != nil { if err != nil {
@ -181,21 +187,21 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
} }
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices. // newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Discover, error) { func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Discover, error) {
allDevices := NewDeviceDiscoverer( allDevices := NewDeviceDiscoverer(
logger, logger,
lookup.NewCharDeviceLocator( lookup.NewCharDeviceLocator(
lookup.WithLogger(logger), lookup.WithLogger(logger),
lookup.WithRoot(driverRoot), lookup.WithRoot(devRoot),
), ),
driverRoot, devRoot,
[]string{ []string{
"/dev/dri/card*", "/dev/dri/card*",
"/dev/dri/renderD*", "/dev/dri/renderD*",
}, },
) )
filter, err := newDRMDeviceFilter(logger, devices, driverRoot) filter, err := newDRMDeviceFilter(logger, devices, devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err) return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
} }
@ -211,8 +217,8 @@ func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevice
} }
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices. // newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Filter, error) { func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot) gpuInformationPaths, err := proc.GetInformationFilePaths(devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to read GPU information: %v", err) return nil, fmt.Errorf("failed to read GPU information: %v", err)
} }

View File

@ -42,7 +42,9 @@ func NewGDSModifier(logger logger.Interface, cfg *config.Config, image image.CUD
return nil, nil return nil, nil
} }
d, err := discover.NewGDSDiscoverer(logger, cfg.NVIDIAContainerCLIConfig.Root) driverRoot := cfg.NVIDIAContainerCLIConfig.Root
devRoot := cfg.NVIDIAContainerCLIConfig.Root
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %v", err) return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %v", err)
} }

View File

@ -36,9 +36,8 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag
d, err := discover.NewGraphicsDiscoverer( d, err := discover.NewGraphicsDiscoverer(
logger, logger,
cfg,
image.DevicesFromEnvvars(visibleDevicesEnvvar), image.DevicesFromEnvvars(visibleDevicesEnvvar),
cfg.NVIDIAContainerCLIConfig.Root,
cfg.NVIDIACTKConfig.Path,
) )
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct discoverer: %v", err) return nil, fmt.Errorf("failed to construct discoverer: %v", err)

View File

@ -29,6 +29,7 @@ type tegraOptions struct {
logger logger.Interface logger logger.Interface
csvFiles []string csvFiles []string
driverRoot string driverRoot string
devRoot string
nvidiaCTKPath string nvidiaCTKPath string
librarySearchPaths []string librarySearchPaths []string
ignorePatterns ignoreMountSpecPatterns ignorePatterns ignoreMountSpecPatterns
@ -50,6 +51,10 @@ func New(opts ...Option) (discover.Discover, error) {
opt(o) opt(o)
} }
if o.devRoot == "" {
o.devRoot = o.driverRoot
}
if o.symlinkLocator == nil { if o.symlinkLocator == nil {
o.symlinkLocator = lookup.NewSymlinkLocator( o.symlinkLocator = lookup.NewSymlinkLocator(
lookup.WithLogger(o.logger), lookup.WithLogger(o.logger),
@ -112,6 +117,14 @@ func WithDriverRoot(driverRoot string) Option {
} }
} }
// WithDevRoot sets the /dev root.
// If this is unset, the driver root is assumed.
func WithDevRoot(driverRoot string) Option {
return func(o *tegraOptions) {
o.driverRoot = driverRoot
}
}
// WithCSVFiles sets the CSV files for the discoverer. // WithCSVFiles sets the CSV files for the discoverer.
func WithCSVFiles(csvFiles []string) Option { func WithCSVFiles(csvFiles []string) Option {
return func(o *tegraOptions) { return func(o *tegraOptions) {

View File

@ -20,22 +20,19 @@ import (
"fmt" "fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
) )
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device. // newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
// This includes driver libraries and meta devices, for example. // This includes driver libraries and meta devices, for example.
func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) { func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
metaDevices := discover.NewDeviceDiscoverer( metaDevices := discover.NewDeviceDiscoverer(
logger, l.logger,
lookup.NewCharDeviceLocator( lookup.NewCharDeviceLocator(
lookup.WithLogger(logger), lookup.WithLogger(l.logger),
lookup.WithRoot(driverRoot), lookup.WithRoot(l.devRoot),
), ),
driverRoot, l.devRoot,
[]string{ []string{
"/dev/nvidia-modeset", "/dev/nvidia-modeset",
"/dev/nvidia-uvm-tools", "/dev/nvidia-uvm-tools",
@ -44,12 +41,12 @@ func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaC
}, },
) )
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath) graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath)
if err != nil { if err != nil {
logger.Warningf("failed to create discoverer for graphics mounts: %v", err) l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
} }
driverFiles, err := NewDriverDiscoverer(logger, driverRoot, nvidiaCTKPath, nvmllib) driverFiles, err := NewDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
} }

View File

@ -26,11 +26,11 @@ const (
) )
// newDXGDeviceDiscoverer returns a Discoverer for DXG devices under WSL2. // newDXGDeviceDiscoverer returns a Discoverer for DXG devices under WSL2.
func newDXGDeviceDiscoverer(logger logger.Interface, driverRoot string) discover.Discover { func newDXGDeviceDiscoverer(logger logger.Interface, devRoot string) discover.Discover {
deviceNodes := discover.NewCharDeviceDiscoverer( deviceNodes := discover.NewCharDeviceDiscoverer(
logger, logger,
[]string{dxgDeviceNode}, []string{dxgDeviceNode},
driverRoot, devRoot,
) )
return deviceNodes return deviceNodes

View File

@ -54,7 +54,7 @@ func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, erro
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'. // GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) { func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) {
device, err := newFullGPUDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, d) device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCTKPath, d)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create device discoverer: %v", err) return nil, fmt.Errorf("failed to create device discoverer: %v", err)
} }
@ -70,7 +70,7 @@ func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links // byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
type byPathHookDiscoverer struct { type byPathHookDiscoverer struct {
logger logger.Interface logger logger.Interface
driverRoot string devRoot string
nvidiaCTKPath string nvidiaCTKPath string
pciBusID string pciBusID string
deviceNodes discover.Discover deviceNodes discover.Discover
@ -79,7 +79,7 @@ type byPathHookDiscoverer struct {
var _ discover.Discover = (*byPathHookDiscoverer)(nil) var _ discover.Discover = (*byPathHookDiscoverer)(nil)
// newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device. // newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) { func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface. // TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin. // This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
minor, ret := d.GetMinorNumber() minor, ret := d.GetMinorNumber()
@ -104,12 +104,12 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP
deviceNodes := discover.NewCharDeviceDiscoverer( deviceNodes := discover.NewCharDeviceDiscoverer(
logger, logger,
deviceNodePaths, deviceNodePaths,
driverRoot, devRoot,
) )
byPathHooks := &byPathHookDiscoverer{ byPathHooks := &byPathHookDiscoverer{
logger: logger, logger: logger,
driverRoot: driverRoot, devRoot: devRoot,
nvidiaCTKPath: nvidiaCTKPath, nvidiaCTKPath: nvidiaCTKPath,
pciBusID: pciBusID, pciBusID: pciBusID,
deviceNodes: deviceNodes, deviceNodes: deviceNodes,
@ -117,7 +117,7 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer( deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
logger, logger,
driverRoot, devRoot,
nvidiaCTKPath, nvidiaCTKPath,
deviceNodes, deviceNodes,
) )
@ -189,7 +189,7 @@ func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) {
var links []string var links []string
for _, c := range candidates { for _, c := range candidates {
linkPath := filepath.Join(d.driverRoot, c) linkPath := filepath.Join(d.devRoot, c)
device, err := os.Readlink(linkPath) device, err := os.Readlink(linkPath)
if err != nil { if err != nil {
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath) d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath)

View File

@ -33,7 +33,7 @@ var _ Interface = (*gdslib)(nil)
// GetAllDeviceSpecs returns the device specs for all available devices. // GetAllDeviceSpecs returns the device specs for all available devices.
func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) { func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) {
discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot) discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot, l.devRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err) return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err)
} }

View File

@ -42,6 +42,7 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
d, err := tegra.New( d, err := tegra.New(
tegra.WithLogger(l.logger), tegra.WithLogger(l.logger),
tegra.WithDriverRoot(l.driverRoot), tegra.WithDriverRoot(l.driverRoot),
tegra.WithDevRoot(l.devRoot),
tegra.WithNVIDIACTKPath(l.nvidiaCTKPath), tegra.WithNVIDIACTKPath(l.nvidiaCTKPath),
tegra.WithCSVFiles(l.csvFiles), tegra.WithCSVFiles(l.csvFiles),
tegra.WithLibrarySearchPaths(l.librarySearchPaths...), tegra.WithLibrarySearchPaths(l.librarySearchPaths...),

View File

@ -66,7 +66,7 @@ func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
// GetCommonEdits generates a CDI specification that can be used for ANY devices // GetCommonEdits generates a CDI specification that can be used for ANY devices
func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) { func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
common, err := newCommonNVMLDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib) common, err := l.newCommonNVMLDiscoverer()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err) return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
} }

View File

@ -37,7 +37,7 @@ func (l *wsllib) GetSpec() (spec.Interface, error) {
// GetAllDeviceSpecs returns the device specs for all available devices. // GetAllDeviceSpecs returns the device specs for all available devices.
func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) { func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) {
device := newDXGDeviceDiscoverer(l.logger, l.driverRoot) device := newDXGDeviceDiscoverer(l.logger, l.devRoot)
deviceEdits, err := edits.FromDiscoverer(device) deviceEdits, err := edits.FromDiscoverer(device)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create container edits for DXG device: %v", err) return nil, fmt.Errorf("failed to create container edits for DXG device: %v", err)

View File

@ -44,6 +44,7 @@ type nvcdilib struct {
devicelib device.Interface devicelib device.Interface
deviceNamer DeviceNamer deviceNamer DeviceNamer
driverRoot string driverRoot string
devRoot string
nvidiaCTKPath string nvidiaCTKPath string
librarySearchPaths []string librarySearchPaths []string
@ -76,6 +77,9 @@ func New(opts ...Option) (Interface, error) {
if l.driverRoot == "" { if l.driverRoot == "" {
l.driverRoot = "/" l.driverRoot = "/"
} }
if l.devRoot == "" {
l.devRoot = l.driverRoot
}
if l.nvidiaCTKPath == "" { if l.nvidiaCTKPath == "" {
l.nvidiaCTKPath = "/usr/bin/nvidia-ctk" l.nvidiaCTKPath = "/usr/bin/nvidia-ctk"
} }

View File

@ -117,12 +117,12 @@ func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, erro
"/dev/nvidia-uvm", "/dev/nvidia-uvm",
"/dev/nvidiactl", "/dev/nvidiactl",
}, },
m.driverRoot, m.devRoot,
) )
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer( deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
m.logger, m.logger,
m.driverRoot, m.devRoot,
m.nvidiaCTKPath, m.nvidiaCTKPath,
deviceNodes, deviceNodes,
) )

View File

@ -47,6 +47,13 @@ func WithDriverRoot(root string) Option {
} }
} }
// WithDevRoot sets the root where /dev is located.
func WithDevRoot(root string) Option {
return func(l *nvcdilib) {
l.devRoot = root
}
}
// WithLogger sets the logger for the library // WithLogger sets the logger for the library
func WithLogger(logger logger.Interface) Option { func WithLogger(logger logger.Interface) Option {
return func(l *nvcdilib) { return func(l *nvcdilib) {

View File

@ -26,7 +26,7 @@ import (
type deviceFolderPermissions struct { type deviceFolderPermissions struct {
logger logger.Interface logger logger.Interface
driverRoot string devRoot string
nvidiaCTKPath string nvidiaCTKPath string
devices discover.Discover devices discover.Discover
} }
@ -39,10 +39,10 @@ var _ discover.Discover = (*deviceFolderPermissions)(nil)
// The nested devices that are applicable to the NVIDIA GPU devices are: // The nested devices that are applicable to the NVIDIA GPU devices are:
// - DRM devices at /dev/dri/* // - DRM devices at /dev/dri/*
// - NVIDIA Caps devices at /dev/nvidia-caps/* // - NVIDIA Caps devices at /dev/nvidia-caps/*
func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover { func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover {
d := &deviceFolderPermissions{ d := &deviceFolderPermissions{
logger: logger, logger: logger,
driverRoot: driverRoot, devRoot: devRoot,
nvidiaCTKPath: nvidiaCTKPath, nvidiaCTKPath: nvidiaCTKPath,
devices: devices, devices: devices,
} }