Add devRoot option to CDI api

A driverRoot defines both the driver library root and the
root for device nodes. In the case of preinstalled drivers or
the driver container, these are equal, but in cases such as GKE
they do not match. In this case, drivers are extracted to a folder
and devices exist at the root /.

The changes here add a devRoot option to the nvcdi API that allows the
parent of /dev to be specified explicitly.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2023-11-14 16:57:37 +01:00
parent f6e3593a72
commit d4e21fdd10
15 changed files with 73 additions and 44 deletions

View File

@ -22,6 +22,7 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
@ -31,18 +32,23 @@ import (
)
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
func NewGraphicsDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string, nvidiaCTKPath string) (Discover, error) {
func NewGraphicsDiscoverer(logger logger.Interface, cfg *config.Config, devices image.VisibleDevices) (Discover, error) {
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
// In standard usage, the devRoot is the same as the driverRoot.
devRoot := driverRoot
nvidiaCTKPath := cfg.NVIDIACTKConfig.Path
mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
if err != nil {
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
}
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot)
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
}
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, driverRoot, nvidiaCTKPath)
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath)
discover := Merge(
Merge(drmDeviceNodes, drmByPathSymlinks),
@ -99,16 +105,16 @@ type drmDevicesByPath struct {
None
logger logger.Interface
nvidiaCTKPath string
driverRoot string
devRoot string
devicesFrom Discover
}
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, driverRoot string, nvidiaCTKPath string) Discover {
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover {
d := drmDevicesByPath{
logger: logger,
nvidiaCTKPath: nvidiaCTKPath,
driverRoot: driverRoot,
devRoot: devRoot,
devicesFrom: devices,
}
@ -155,7 +161,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
linkLocator := lookup.NewFileLocator(
lookup.WithLogger(d.logger),
lookup.WithRoot(d.driverRoot),
lookup.WithRoot(d.devRoot),
)
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
if err != nil {
@ -181,21 +187,21 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
}
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Discover, error) {
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Discover, error) {
allDevices := NewDeviceDiscoverer(
logger,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithRoot(devRoot),
),
driverRoot,
devRoot,
[]string{
"/dev/dri/card*",
"/dev/dri/renderD*",
},
)
filter, err := newDRMDeviceFilter(logger, devices, driverRoot)
filter, err := newDRMDeviceFilter(logger, devices, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
}
@ -211,8 +217,8 @@ func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevice
}
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot)
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(devRoot)
if err != nil {
return nil, fmt.Errorf("failed to read GPU information: %v", err)
}

View File

@ -42,7 +42,9 @@ func NewGDSModifier(logger logger.Interface, cfg *config.Config, image image.CUD
return nil, nil
}
d, err := discover.NewGDSDiscoverer(logger, cfg.NVIDIAContainerCLIConfig.Root)
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
devRoot := cfg.NVIDIAContainerCLIConfig.Root
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %v", err)
}

View File

@ -36,9 +36,8 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag
d, err := discover.NewGraphicsDiscoverer(
logger,
cfg,
image.DevicesFromEnvvars(visibleDevicesEnvvar),
cfg.NVIDIAContainerCLIConfig.Root,
cfg.NVIDIACTKConfig.Path,
)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer: %v", err)

View File

@ -29,6 +29,7 @@ type tegraOptions struct {
logger logger.Interface
csvFiles []string
driverRoot string
devRoot string
nvidiaCTKPath string
librarySearchPaths []string
ignorePatterns ignoreMountSpecPatterns
@ -50,6 +51,10 @@ func New(opts ...Option) (discover.Discover, error) {
opt(o)
}
if o.devRoot == "" {
o.devRoot = o.driverRoot
}
if o.symlinkLocator == nil {
o.symlinkLocator = lookup.NewSymlinkLocator(
lookup.WithLogger(o.logger),
@ -112,6 +117,14 @@ func WithDriverRoot(driverRoot string) Option {
}
}
// WithDevRoot sets the /dev root.
// If this is unset, the driver root is assumed.
func WithDevRoot(driverRoot string) Option {
return func(o *tegraOptions) {
o.driverRoot = driverRoot
}
}
// WithCSVFiles sets the CSV files for the discoverer.
func WithCSVFiles(csvFiles []string) Option {
return func(o *tegraOptions) {

View File

@ -20,22 +20,19 @@ import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
// This includes driver libraries and meta devices, for example.
func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
metaDevices := discover.NewDeviceDiscoverer(
logger,
l.logger,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
lookup.WithLogger(l.logger),
lookup.WithRoot(l.devRoot),
),
driverRoot,
l.devRoot,
[]string{
"/dev/nvidia-modeset",
"/dev/nvidia-uvm-tools",
@ -44,12 +41,12 @@ func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaC
},
)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath)
if err != nil {
logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
}
driverFiles, err := NewDriverDiscoverer(logger, driverRoot, nvidiaCTKPath, nvmllib)
driverFiles, err := NewDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
}

View File

@ -26,11 +26,11 @@ const (
)
// newDXGDeviceDiscoverer returns a Discoverer for DXG devices under WSL2.
func newDXGDeviceDiscoverer(logger logger.Interface, driverRoot string) discover.Discover {
func newDXGDeviceDiscoverer(logger logger.Interface, devRoot string) discover.Discover {
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
[]string{dxgDeviceNode},
driverRoot,
devRoot,
)
return deviceNodes

View File

@ -54,7 +54,7 @@ func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, erro
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) {
device, err := newFullGPUDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, d)
device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCTKPath, d)
if err != nil {
return nil, fmt.Errorf("failed to create device discoverer: %v", err)
}
@ -70,7 +70,7 @@ func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
type byPathHookDiscoverer struct {
logger logger.Interface
driverRoot string
devRoot string
nvidiaCTKPath string
pciBusID string
deviceNodes discover.Discover
@ -79,7 +79,7 @@ type byPathHookDiscoverer struct {
var _ discover.Discover = (*byPathHookDiscoverer)(nil)
// newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
minor, ret := d.GetMinorNumber()
@ -104,12 +104,12 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
deviceNodePaths,
driverRoot,
devRoot,
)
byPathHooks := &byPathHookDiscoverer{
logger: logger,
driverRoot: driverRoot,
devRoot: devRoot,
nvidiaCTKPath: nvidiaCTKPath,
pciBusID: pciBusID,
deviceNodes: deviceNodes,
@ -117,7 +117,7 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
logger,
driverRoot,
devRoot,
nvidiaCTKPath,
deviceNodes,
)
@ -189,7 +189,7 @@ func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) {
var links []string
for _, c := range candidates {
linkPath := filepath.Join(d.driverRoot, c)
linkPath := filepath.Join(d.devRoot, c)
device, err := os.Readlink(linkPath)
if err != nil {
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath)

View File

@ -33,7 +33,7 @@ var _ Interface = (*gdslib)(nil)
// GetAllDeviceSpecs returns the device specs for all available devices.
func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) {
discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot)
discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot, l.devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err)
}

View File

@ -42,6 +42,7 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
d, err := tegra.New(
tegra.WithLogger(l.logger),
tegra.WithDriverRoot(l.driverRoot),
tegra.WithDevRoot(l.devRoot),
tegra.WithNVIDIACTKPath(l.nvidiaCTKPath),
tegra.WithCSVFiles(l.csvFiles),
tegra.WithLibrarySearchPaths(l.librarySearchPaths...),

View File

@ -66,7 +66,7 @@ func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
// GetCommonEdits generates a CDI specification that can be used for ANY devices
func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
common, err := newCommonNVMLDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
common, err := l.newCommonNVMLDiscoverer()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
}

View File

@ -37,7 +37,7 @@ func (l *wsllib) GetSpec() (spec.Interface, error) {
// GetAllDeviceSpecs returns the device specs for all available devices.
func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) {
device := newDXGDeviceDiscoverer(l.logger, l.driverRoot)
device := newDXGDeviceDiscoverer(l.logger, l.devRoot)
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return nil, fmt.Errorf("failed to create container edits for DXG device: %v", err)

View File

@ -44,6 +44,7 @@ type nvcdilib struct {
devicelib device.Interface
deviceNamer DeviceNamer
driverRoot string
devRoot string
nvidiaCTKPath string
librarySearchPaths []string
@ -76,6 +77,9 @@ func New(opts ...Option) (Interface, error) {
if l.driverRoot == "" {
l.driverRoot = "/"
}
if l.devRoot == "" {
l.devRoot = l.driverRoot
}
if l.nvidiaCTKPath == "" {
l.nvidiaCTKPath = "/usr/bin/nvidia-ctk"
}

View File

@ -117,12 +117,12 @@ func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, erro
"/dev/nvidia-uvm",
"/dev/nvidiactl",
},
m.driverRoot,
m.devRoot,
)
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
m.logger,
m.driverRoot,
m.devRoot,
m.nvidiaCTKPath,
deviceNodes,
)

View File

@ -47,6 +47,13 @@ func WithDriverRoot(root string) Option {
}
}
// WithDevRoot sets the root where /dev is located.
func WithDevRoot(root string) Option {
return func(l *nvcdilib) {
l.devRoot = root
}
}
// WithLogger sets the logger for the library
func WithLogger(logger logger.Interface) Option {
return func(l *nvcdilib) {

View File

@ -26,7 +26,7 @@ import (
type deviceFolderPermissions struct {
logger logger.Interface
driverRoot string
devRoot string
nvidiaCTKPath string
devices discover.Discover
}
@ -39,10 +39,10 @@ var _ discover.Discover = (*deviceFolderPermissions)(nil)
// The nested devices that are applicable to the NVIDIA GPU devices are:
// - DRM devices at /dev/dri/*
// - NVIDIA Caps devices at /dev/nvidia-caps/*
func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover {
func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover {
d := &deviceFolderPermissions{
logger: logger,
driverRoot: driverRoot,
devRoot: devRoot,
nvidiaCTKPath: nvidiaCTKPath,
devices: devices,
}