Merge branch 'allow-separate-dev-root' into 'main'

Add devRoot option to CDI api

See merge request nvidia/container-toolkit/container-toolkit!497
This commit is contained in:
Evan Lezar 2023-11-20 21:10:12 +00:00
commit e315d7d74b
22 changed files with 93 additions and 73 deletions

View File

@ -45,6 +45,7 @@ type options struct {
format string
deviceNameStrategy string
driverRoot string
devRoot string
nvidiaCTKPath string
mode string
vendor string
@ -101,6 +102,11 @@ func (m command) build() *cli.Command {
Value: nvcdi.ModeAuto,
Destination: &opts.mode,
},
&cli.StringFlag{
Name: "dev-root",
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
Destination: &opts.devRoot,
},
&cli.StringFlag{
Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
@ -236,6 +242,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(opts.driverRoot),
nvcdi.WithDevRoot(opts.devRoot),
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
nvcdi.WithDeviceNamer(deviceNamer),
nvcdi.WithMode(opts.mode),

View File

@ -27,20 +27,13 @@ type charDevices mounts
var _ Discover = (*charDevices)(nil)
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
func NewCharDeviceDiscoverer(logger logger.Interface, devices []string, root string) Discover {
func NewCharDeviceDiscoverer(logger logger.Interface, devRoot string, devices []string) Discover {
locator := lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
lookup.WithRoot(devRoot),
)
return NewDeviceDiscoverer(logger, locator, root, devices)
}
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
func NewDeviceDiscoverer(logger logger.Interface, locator lookup.Locator, root string, devices []string) Discover {
m := NewMounts(logger, locator, root, devices).(*mounts)
return (*charDevices)(m)
return (*charDevices)(newMounts(logger, locator, devRoot, devices))
}
// Mounts returns the discovered mounts for the charDevices.

View File

@ -29,17 +29,17 @@ type gdsDeviceDiscoverer struct {
}
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
func NewGDSDiscoverer(logger logger.Interface, root string) (Discover, error) {
func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
devRoot,
[]string{"/dev/nvidia-fs*"},
root,
)
udev := NewMounts(
logger,
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(root)),
root,
lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(driverRoot)),
driverRoot,
[]string{"/run/udev"},
)
@ -47,9 +47,9 @@ func NewGDSDiscoverer(logger logger.Interface, root string) (Discover, error) {
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(root),
lookup.WithRoot(driverRoot),
),
root,
driverRoot,
[]string{"/etc/cufile.json"},
)

View File

@ -22,6 +22,7 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
@ -31,18 +32,23 @@ import (
)
// NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan.
func NewGraphicsDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string, nvidiaCTKPath string) (Discover, error) {
func NewGraphicsDiscoverer(logger logger.Interface, cfg *config.Config, devices image.VisibleDevices) (Discover, error) {
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
// In standard usage, the devRoot is the same as the driverRoot.
devRoot := driverRoot
nvidiaCTKPath := cfg.NVIDIACTKConfig.Path
mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
if err != nil {
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
}
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot)
drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err)
}
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, driverRoot, nvidiaCTKPath)
drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath)
discover := Merge(
Merge(drmDeviceNodes, drmByPathSymlinks),
@ -99,16 +105,16 @@ type drmDevicesByPath struct {
None
logger logger.Interface
nvidiaCTKPath string
driverRoot string
devRoot string
devicesFrom Discover
}
// newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, driverRoot string, nvidiaCTKPath string) Discover {
func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover {
d := drmDevicesByPath{
logger: logger,
nvidiaCTKPath: nvidiaCTKPath,
driverRoot: driverRoot,
devRoot: devRoot,
devicesFrom: devices,
}
@ -155,7 +161,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
linkLocator := lookup.NewFileLocator(
lookup.WithLogger(d.logger),
lookup.WithRoot(d.driverRoot),
lookup.WithRoot(d.devRoot),
)
candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*")
if err != nil {
@ -181,21 +187,17 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error
}
// newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices.
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Discover, error) {
allDevices := NewDeviceDiscoverer(
func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Discover, error) {
allDevices := NewCharDeviceDiscoverer(
logger,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
devRoot,
[]string{
"/dev/dri/card*",
"/dev/dri/renderD*",
},
)
filter, err := newDRMDeviceFilter(logger, devices, driverRoot)
filter, err := newDRMDeviceFilter(logger, devices, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct DRM device filter: %v", err)
}
@ -211,8 +213,8 @@ func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevice
}
// newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices.
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot)
func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Filter, error) {
gpuInformationPaths, err := proc.GetInformationFilePaths(devRoot)
if err != nil {
return nil, fmt.Errorf("failed to read GPU information: %v", err)
}

View File

@ -19,14 +19,14 @@ package discover
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
// NewMOFEDDiscoverer creates a discoverer for MOFED devices.
func NewMOFEDDiscoverer(logger logger.Interface, root string) (Discover, error) {
func NewMOFEDDiscoverer(logger logger.Interface, devRoot string) (Discover, error) {
devices := NewCharDeviceDiscoverer(
logger,
devRoot,
[]string{
"/dev/infiniband/uverbs*",
"/dev/infiniband/rdma_cm",
},
root,
)
return devices, nil

View File

@ -148,6 +148,7 @@ var _ Locator = (*file)(nil)
func (p file) Locate(pattern string) ([]string, error) {
var filenames []string
p.logger.Debugf("Locating %q in %v", pattern, p.prefixes)
visit:
for _, prefix := range p.prefixes {
pathPattern := filepath.Join(prefix, pattern)

View File

@ -42,7 +42,9 @@ func NewGDSModifier(logger logger.Interface, cfg *config.Config, image image.CUD
return nil, nil
}
d, err := discover.NewGDSDiscoverer(logger, cfg.NVIDIAContainerCLIConfig.Root)
driverRoot := cfg.NVIDIAContainerCLIConfig.Root
devRoot := cfg.NVIDIAContainerCLIConfig.Root
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %v", err)
}

View File

@ -36,9 +36,8 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag
d, err := discover.NewGraphicsDiscoverer(
logger,
cfg,
image.DevicesFromEnvvars(visibleDevicesEnvvar),
cfg.NVIDIAContainerCLIConfig.Root,
cfg.NVIDIACTKConfig.Path,
)
if err != nil {
return nil, fmt.Errorf("failed to construct discoverer: %v", err)

View File

@ -36,9 +36,8 @@ func (o tegraOptions) newDiscovererFromCSVFiles() (discover.Discover, error) {
targetsByType := getTargetsFromCSVFiles(o.logger, o.csvFiles)
devices := discover.NewDeviceDiscoverer(
devices := discover.NewCharDeviceDiscoverer(
o.logger,
lookup.NewCharDeviceLocator(lookup.WithLogger(o.logger), lookup.WithRoot(o.driverRoot)),
o.driverRoot,
targetsByType[csv.MountSpecDev],
)

View File

@ -29,6 +29,7 @@ type tegraOptions struct {
logger logger.Interface
csvFiles []string
driverRoot string
devRoot string
nvidiaCTKPath string
librarySearchPaths []string
ignorePatterns ignoreMountSpecPatterns
@ -50,6 +51,10 @@ func New(opts ...Option) (discover.Discover, error) {
opt(o)
}
if o.devRoot == "" {
o.devRoot = o.driverRoot
}
if o.symlinkLocator == nil {
o.symlinkLocator = lookup.NewSymlinkLocator(
lookup.WithLogger(o.logger),
@ -112,6 +117,14 @@ func WithDriverRoot(driverRoot string) Option {
}
}
// WithDevRoot sets the /dev root.
// If this is unset, the driver root is assumed.
func WithDevRoot(driverRoot string) Option {
return func(o *tegraOptions) {
o.driverRoot = driverRoot
}
}
// WithCSVFiles sets the CSV files for the discoverer.
func WithCSVFiles(csvFiles []string) Option {
return func(o *tegraOptions) {

View File

@ -20,22 +20,14 @@ import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
)
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
// This includes driver libraries and meta devices, for example.
func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
metaDevices := discover.NewDeviceDiscoverer(
logger,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
metaDevices := discover.NewCharDeviceDiscoverer(
l.logger,
l.devRoot,
[]string{
"/dev/nvidia-modeset",
"/dev/nvidia-uvm-tools",
@ -44,12 +36,12 @@ func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaC
},
)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath)
if err != nil {
logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
}
driverFiles, err := NewDriverDiscoverer(logger, driverRoot, nvidiaCTKPath, nvmllib)
driverFiles, err := NewDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
}

View File

@ -26,11 +26,11 @@ const (
)
// newDXGDeviceDiscoverer returns a Discoverer for DXG devices under WSL2.
func newDXGDeviceDiscoverer(logger logger.Interface, driverRoot string) discover.Discover {
func newDXGDeviceDiscoverer(logger logger.Interface, devRoot string) discover.Discover {
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
devRoot,
[]string{dxgDeviceNode},
driverRoot,
)
return deviceNodes

View File

@ -54,7 +54,7 @@ func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, erro
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) {
device, err := newFullGPUDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, d)
device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCTKPath, d)
if err != nil {
return nil, fmt.Errorf("failed to create device discoverer: %v", err)
}
@ -70,7 +70,7 @@ func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
type byPathHookDiscoverer struct {
logger logger.Interface
driverRoot string
devRoot string
nvidiaCTKPath string
pciBusID string
deviceNodes discover.Discover
@ -79,7 +79,7 @@ type byPathHookDiscoverer struct {
var _ discover.Discover = (*byPathHookDiscoverer)(nil)
// newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
minor, ret := d.GetMinorNumber()
@ -103,13 +103,13 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
devRoot,
deviceNodePaths,
driverRoot,
)
byPathHooks := &byPathHookDiscoverer{
logger: logger,
driverRoot: driverRoot,
devRoot: devRoot,
nvidiaCTKPath: nvidiaCTKPath,
pciBusID: pciBusID,
deviceNodes: deviceNodes,
@ -117,7 +117,7 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
logger,
driverRoot,
devRoot,
nvidiaCTKPath,
deviceNodes,
)
@ -189,7 +189,7 @@ func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) {
var links []string
for _, c := range candidates {
linkPath := filepath.Join(d.driverRoot, c)
linkPath := filepath.Join(d.devRoot, c)
device, err := os.Readlink(linkPath)
if err != nil {
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath)

View File

@ -33,7 +33,7 @@ var _ Interface = (*gdslib)(nil)
// GetAllDeviceSpecs returns the device specs for all available devices.
func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) {
discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot)
discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot, l.devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err)
}

View File

@ -42,6 +42,7 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
d, err := tegra.New(
tegra.WithLogger(l.logger),
tegra.WithDriverRoot(l.driverRoot),
tegra.WithDevRoot(l.devRoot),
tegra.WithNVIDIACTKPath(l.nvidiaCTKPath),
tegra.WithCSVFiles(l.csvFiles),
tegra.WithLibrarySearchPaths(l.librarySearchPaths...),

View File

@ -66,7 +66,7 @@ func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
// GetCommonEdits generates a CDI specification that can be used for ANY devices
func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
common, err := newCommonNVMLDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
common, err := l.newCommonNVMLDiscoverer()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
}

View File

@ -37,7 +37,7 @@ func (l *wsllib) GetSpec() (spec.Interface, error) {
// GetAllDeviceSpecs returns the device specs for all available devices.
func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) {
device := newDXGDeviceDiscoverer(l.logger, l.driverRoot)
device := newDXGDeviceDiscoverer(l.logger, l.devRoot)
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return nil, fmt.Errorf("failed to create container edits for DXG device: %v", err)

View File

@ -44,6 +44,7 @@ type nvcdilib struct {
devicelib device.Interface
deviceNamer DeviceNamer
driverRoot string
devRoot string
nvidiaCTKPath string
librarySearchPaths []string
@ -76,6 +77,9 @@ func New(opts ...Option) (Interface, error) {
if l.driverRoot == "" {
l.driverRoot = "/"
}
if l.devRoot == "" {
l.devRoot = l.driverRoot
}
if l.nvidiaCTKPath == "" {
l.nvidiaCTKPath = "/usr/bin/nvidia-ctk"
}

View File

@ -109,6 +109,7 @@ type managementDiscoverer struct {
func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, error) {
deviceNodes := discover.NewCharDeviceDiscoverer(
m.logger,
m.devRoot,
[]string{
"/dev/nvidia*",
"/dev/nvidia-caps/nvidia-cap*",
@ -117,12 +118,11 @@ func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, erro
"/dev/nvidia-uvm",
"/dev/nvidiactl",
},
m.driverRoot,
)
deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer(
m.logger,
m.driverRoot,
m.devRoot,
m.nvidiaCTKPath,
deviceNodes,
)

View File

@ -112,12 +112,12 @@ func newComputeInstanceDiscoverer(logger logger.Interface, driverRoot string, gp
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
driverRoot,
[]string{
parentPath,
giCapDevicePath,
ciCapDevicePath,
},
driverRoot,
)
return deviceNodes, nil

View File

@ -47,6 +47,13 @@ func WithDriverRoot(root string) Option {
}
}
// WithDevRoot sets the root where /dev is located.
func WithDevRoot(root string) Option {
return func(l *nvcdilib) {
l.devRoot = root
}
}
// WithLogger sets the logger for the library
func WithLogger(logger logger.Interface) Option {
return func(l *nvcdilib) {

View File

@ -26,7 +26,7 @@ import (
type deviceFolderPermissions struct {
logger logger.Interface
driverRoot string
devRoot string
nvidiaCTKPath string
devices discover.Discover
}
@ -39,10 +39,10 @@ var _ discover.Discover = (*deviceFolderPermissions)(nil)
// The nested devices that are applicable to the NVIDIA GPU devices are:
// - DRM devices at /dev/dri/*
// - NVIDIA Caps devices at /dev/nvidia-caps/*
func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover {
func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover {
d := &deviceFolderPermissions{
logger: logger,
driverRoot: driverRoot,
devRoot: devRoot,
nvidiaCTKPath: nvidiaCTKPath,
devices: devices,
}