From d4e21fdd109f5e0a7938f1d4f0930465b8bf299e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 14 Nov 2023 16:57:37 +0100 Subject: [PATCH 1/4] Add devRoot option to CDI api A driverRoot defines both the driver library root and the root for device nodes. In the case of preinstalled drivers or the driver container, these are equal, but in cases such as GKE they do not match. In this case, drivers are extracted to a folder and devices exist at the root /. The changes here add a devRoot option to the nvcdi API that allows the parent of /dev to be specified explicitly. Signed-off-by: Evan Lezar --- internal/discover/graphics.go | 32 +++++++++++-------- internal/modifier/gds.go | 4 ++- internal/modifier/graphics.go | 3 +- internal/platform-support/tegra/tegra.go | 13 ++++++++ pkg/nvcdi/common-nvml.go | 19 +++++------ pkg/nvcdi/device-wsl.go | 4 +-- pkg/nvcdi/full-gpu-nvml.go | 14 ++++---- pkg/nvcdi/gds.go | 2 +- pkg/nvcdi/lib-csv.go | 1 + pkg/nvcdi/lib-nvml.go | 2 +- pkg/nvcdi/lib-wsl.go | 2 +- pkg/nvcdi/lib.go | 4 +++ pkg/nvcdi/management.go | 4 +-- pkg/nvcdi/options.go | 7 ++++ .../workarounds-device-folder-permissions.go | 6 ++-- 15 files changed, 73 insertions(+), 44 deletions(-) diff --git a/internal/discover/graphics.go b/internal/discover/graphics.go index b52eb563..d70d4738 100644 --- a/internal/discover/graphics.go +++ b/internal/discover/graphics.go @@ -22,6 +22,7 @@ import ( "path/filepath" "strings" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm" "github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc" @@ -31,18 +32,23 @@ import ( ) // NewGraphicsDiscoverer returns the discoverer for graphics tools such as Vulkan. -func NewGraphicsDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string, nvidiaCTKPath string) (Discover, error) { +func NewGraphicsDiscoverer(logger logger.Interface, cfg *config.Config, devices image.VisibleDevices) (Discover, error) { + driverRoot := cfg.NVIDIAContainerCLIConfig.Root + // In standard usage, the devRoot is the same as the driverRoot. + devRoot := driverRoot + nvidiaCTKPath := cfg.NVIDIACTKConfig.Path + mounts, err := NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath) if err != nil { return nil, fmt.Errorf("failed to create mounts discoverer: %v", err) } - drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, driverRoot) + drmDeviceNodes, err := newDRMDeviceDiscoverer(logger, devices, devRoot) if err != nil { return nil, fmt.Errorf("failed to create DRM device discoverer: %v", err) } - drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, driverRoot, nvidiaCTKPath) + drmByPathSymlinks := newCreateDRMByPathSymlinks(logger, drmDeviceNodes, devRoot, nvidiaCTKPath) discover := Merge( Merge(drmDeviceNodes, drmByPathSymlinks), @@ -99,16 +105,16 @@ type drmDevicesByPath struct { None logger logger.Interface nvidiaCTKPath string - driverRoot string + devRoot string devicesFrom Discover } // newCreateDRMByPathSymlinks creates a discoverer for a hook to create the by-path symlinks for DRM devices discovered by the specified devices discoverer -func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, driverRoot string, nvidiaCTKPath string) Discover { +func newCreateDRMByPathSymlinks(logger logger.Interface, devices Discover, devRoot string, nvidiaCTKPath string) Discover { d := drmDevicesByPath{ logger: logger, nvidiaCTKPath: nvidiaCTKPath, - driverRoot: driverRoot, + devRoot: devRoot, devicesFrom: devices, } @@ -155,7 +161,7 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error linkLocator := lookup.NewFileLocator( lookup.WithLogger(d.logger), - lookup.WithRoot(d.driverRoot), + lookup.WithRoot(d.devRoot), ) candidates, err := linkLocator.Locate("/dev/dri/by-path/pci-*-*") if err != nil { @@ -181,21 +187,21 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error } // newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices. -func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Discover, error) { +func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Discover, error) { allDevices := NewDeviceDiscoverer( logger, lookup.NewCharDeviceLocator( lookup.WithLogger(logger), - lookup.WithRoot(driverRoot), + lookup.WithRoot(devRoot), ), - driverRoot, + devRoot, []string{ "/dev/dri/card*", "/dev/dri/renderD*", }, ) - filter, err := newDRMDeviceFilter(logger, devices, driverRoot) + filter, err := newDRMDeviceFilter(logger, devices, devRoot) if err != nil { return nil, fmt.Errorf("failed to construct DRM device filter: %v", err) } @@ -211,8 +217,8 @@ func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevice } // newDRMDeviceFilter creates a filter that matches DRM devices nodes for the visible devices. -func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, driverRoot string) (Filter, error) { - gpuInformationPaths, err := proc.GetInformationFilePaths(driverRoot) +func newDRMDeviceFilter(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Filter, error) { + gpuInformationPaths, err := proc.GetInformationFilePaths(devRoot) if err != nil { return nil, fmt.Errorf("failed to read GPU information: %v", err) } diff --git a/internal/modifier/gds.go b/internal/modifier/gds.go index dd03a731..ac431405 100644 --- a/internal/modifier/gds.go +++ b/internal/modifier/gds.go @@ -42,7 +42,9 @@ func NewGDSModifier(logger logger.Interface, cfg *config.Config, image image.CUD return nil, nil } - d, err := discover.NewGDSDiscoverer(logger, cfg.NVIDIAContainerCLIConfig.Root) + driverRoot := cfg.NVIDIAContainerCLIConfig.Root + devRoot := cfg.NVIDIAContainerCLIConfig.Root + d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot) if err != nil { return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %v", err) } diff --git a/internal/modifier/graphics.go b/internal/modifier/graphics.go index 57776a72..2852ba2c 100644 --- a/internal/modifier/graphics.go +++ b/internal/modifier/graphics.go @@ -36,9 +36,8 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag d, err := discover.NewGraphicsDiscoverer( logger, + cfg, image.DevicesFromEnvvars(visibleDevicesEnvvar), - cfg.NVIDIAContainerCLIConfig.Root, - cfg.NVIDIACTKConfig.Path, ) if err != nil { return nil, fmt.Errorf("failed to construct discoverer: %v", err) diff --git a/internal/platform-support/tegra/tegra.go b/internal/platform-support/tegra/tegra.go index 563c6e28..6fb42674 100644 --- a/internal/platform-support/tegra/tegra.go +++ b/internal/platform-support/tegra/tegra.go @@ -29,6 +29,7 @@ type tegraOptions struct { logger logger.Interface csvFiles []string driverRoot string + devRoot string nvidiaCTKPath string librarySearchPaths []string ignorePatterns ignoreMountSpecPatterns @@ -50,6 +51,10 @@ func New(opts ...Option) (discover.Discover, error) { opt(o) } + if o.devRoot == "" { + o.devRoot = o.driverRoot + } + if o.symlinkLocator == nil { o.symlinkLocator = lookup.NewSymlinkLocator( lookup.WithLogger(o.logger), @@ -112,6 +117,14 @@ func WithDriverRoot(driverRoot string) Option { } } +// WithDevRoot sets the /dev root. +// If this is unset, the driver root is assumed. +func WithDevRoot(driverRoot string) Option { + return func(o *tegraOptions) { + o.driverRoot = driverRoot + } +} + // WithCSVFiles sets the CSV files for the discoverer. func WithCSVFiles(csvFiles []string) Option { return func(o *tegraOptions) { diff --git a/pkg/nvcdi/common-nvml.go b/pkg/nvcdi/common-nvml.go index 3c9fe6bf..f7a22c8a 100644 --- a/pkg/nvcdi/common-nvml.go +++ b/pkg/nvcdi/common-nvml.go @@ -20,22 +20,19 @@ import ( "fmt" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" - "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" - - "github.com/NVIDIA/go-nvlib/pkg/nvml" ) // newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device. // This includes driver libraries and meta devices, for example. -func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) { +func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) { metaDevices := discover.NewDeviceDiscoverer( - logger, + l.logger, lookup.NewCharDeviceLocator( - lookup.WithLogger(logger), - lookup.WithRoot(driverRoot), + lookup.WithLogger(l.logger), + lookup.WithRoot(l.devRoot), ), - driverRoot, + l.devRoot, []string{ "/dev/nvidia-modeset", "/dev/nvidia-uvm-tools", @@ -44,12 +41,12 @@ func newCommonNVMLDiscoverer(logger logger.Interface, driverRoot string, nvidiaC }, ) - graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(logger, driverRoot, nvidiaCTKPath) + graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath) if err != nil { - logger.Warningf("failed to create discoverer for graphics mounts: %v", err) + l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err) } - driverFiles, err := NewDriverDiscoverer(logger, driverRoot, nvidiaCTKPath, nvmllib) + driverFiles, err := NewDriverDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) } diff --git a/pkg/nvcdi/device-wsl.go b/pkg/nvcdi/device-wsl.go index 418016b5..b1d93468 100644 --- a/pkg/nvcdi/device-wsl.go +++ b/pkg/nvcdi/device-wsl.go @@ -26,11 +26,11 @@ const ( ) // newDXGDeviceDiscoverer returns a Discoverer for DXG devices under WSL2. -func newDXGDeviceDiscoverer(logger logger.Interface, driverRoot string) discover.Discover { +func newDXGDeviceDiscoverer(logger logger.Interface, devRoot string) discover.Discover { deviceNodes := discover.NewCharDeviceDiscoverer( logger, []string{dxgDeviceNode}, - driverRoot, + devRoot, ) return deviceNodes diff --git a/pkg/nvcdi/full-gpu-nvml.go b/pkg/nvcdi/full-gpu-nvml.go index b808153b..f22e449c 100644 --- a/pkg/nvcdi/full-gpu-nvml.go +++ b/pkg/nvcdi/full-gpu-nvml.go @@ -54,7 +54,7 @@ func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, erro // GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'. func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error) { - device, err := newFullGPUDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, d) + device, err := newFullGPUDiscoverer(l.logger, l.devRoot, l.nvidiaCTKPath, d) if err != nil { return nil, fmt.Errorf("failed to create device discoverer: %v", err) } @@ -70,7 +70,7 @@ func (l *nvmllib) GetGPUDeviceEdits(d device.Device) (*cdi.ContainerEdits, error // byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links type byPathHookDiscoverer struct { logger logger.Interface - driverRoot string + devRoot string nvidiaCTKPath string pciBusID string deviceNodes discover.Discover @@ -79,7 +79,7 @@ type byPathHookDiscoverer struct { var _ discover.Discover = (*byPathHookDiscoverer)(nil) // newFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device. -func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) { +func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) { // TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface. // This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin. minor, ret := d.GetMinorNumber() @@ -104,12 +104,12 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP deviceNodes := discover.NewCharDeviceDiscoverer( logger, deviceNodePaths, - driverRoot, + devRoot, ) byPathHooks := &byPathHookDiscoverer{ logger: logger, - driverRoot: driverRoot, + devRoot: devRoot, nvidiaCTKPath: nvidiaCTKPath, pciBusID: pciBusID, deviceNodes: deviceNodes, @@ -117,7 +117,7 @@ func newFullGPUDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKP deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer( logger, - driverRoot, + devRoot, nvidiaCTKPath, deviceNodes, ) @@ -189,7 +189,7 @@ func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) { var links []string for _, c := range candidates { - linkPath := filepath.Join(d.driverRoot, c) + linkPath := filepath.Join(d.devRoot, c) device, err := os.Readlink(linkPath) if err != nil { d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath) diff --git a/pkg/nvcdi/gds.go b/pkg/nvcdi/gds.go index 3cb5bcf2..c50ab5e5 100644 --- a/pkg/nvcdi/gds.go +++ b/pkg/nvcdi/gds.go @@ -33,7 +33,7 @@ var _ Interface = (*gdslib)(nil) // GetAllDeviceSpecs returns the device specs for all available devices. func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) { - discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot) + discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot, l.devRoot) if err != nil { return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err) } diff --git a/pkg/nvcdi/lib-csv.go b/pkg/nvcdi/lib-csv.go index e0b7fbc6..14b6b5a0 100644 --- a/pkg/nvcdi/lib-csv.go +++ b/pkg/nvcdi/lib-csv.go @@ -42,6 +42,7 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) { d, err := tegra.New( tegra.WithLogger(l.logger), tegra.WithDriverRoot(l.driverRoot), + tegra.WithDevRoot(l.devRoot), tegra.WithNVIDIACTKPath(l.nvidiaCTKPath), tegra.WithCSVFiles(l.csvFiles), tegra.WithLibrarySearchPaths(l.librarySearchPaths...), diff --git a/pkg/nvcdi/lib-nvml.go b/pkg/nvcdi/lib-nvml.go index c49833cf..82cc9b6e 100644 --- a/pkg/nvcdi/lib-nvml.go +++ b/pkg/nvcdi/lib-nvml.go @@ -66,7 +66,7 @@ func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) { // GetCommonEdits generates a CDI specification that can be used for ANY devices func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) { - common, err := newCommonNVMLDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib) + common, err := l.newCommonNVMLDiscoverer() if err != nil { return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err) } diff --git a/pkg/nvcdi/lib-wsl.go b/pkg/nvcdi/lib-wsl.go index 23ee79bd..b2ecf173 100644 --- a/pkg/nvcdi/lib-wsl.go +++ b/pkg/nvcdi/lib-wsl.go @@ -37,7 +37,7 @@ func (l *wsllib) GetSpec() (spec.Interface, error) { // GetAllDeviceSpecs returns the device specs for all available devices. func (l *wsllib) GetAllDeviceSpecs() ([]specs.Device, error) { - device := newDXGDeviceDiscoverer(l.logger, l.driverRoot) + device := newDXGDeviceDiscoverer(l.logger, l.devRoot) deviceEdits, err := edits.FromDiscoverer(device) if err != nil { return nil, fmt.Errorf("failed to create container edits for DXG device: %v", err) diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index f5b53245..8a60ac6b 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -44,6 +44,7 @@ type nvcdilib struct { devicelib device.Interface deviceNamer DeviceNamer driverRoot string + devRoot string nvidiaCTKPath string librarySearchPaths []string @@ -76,6 +77,9 @@ func New(opts ...Option) (Interface, error) { if l.driverRoot == "" { l.driverRoot = "/" } + if l.devRoot == "" { + l.devRoot = l.driverRoot + } if l.nvidiaCTKPath == "" { l.nvidiaCTKPath = "/usr/bin/nvidia-ctk" } diff --git a/pkg/nvcdi/management.go b/pkg/nvcdi/management.go index 626a6c2b..8050a5b6 100644 --- a/pkg/nvcdi/management.go +++ b/pkg/nvcdi/management.go @@ -117,12 +117,12 @@ func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, erro "/dev/nvidia-uvm", "/dev/nvidiactl", }, - m.driverRoot, + m.devRoot, ) deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer( m.logger, - m.driverRoot, + m.devRoot, m.nvidiaCTKPath, deviceNodes, ) diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index 03cfc362..86bb877d 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -47,6 +47,13 @@ func WithDriverRoot(root string) Option { } } +// WithDevRoot sets the root where /dev is located. +func WithDevRoot(root string) Option { + return func(l *nvcdilib) { + l.devRoot = root + } +} + // WithLogger sets the logger for the library func WithLogger(logger logger.Interface) Option { return func(l *nvcdilib) { diff --git a/pkg/nvcdi/workarounds-device-folder-permissions.go b/pkg/nvcdi/workarounds-device-folder-permissions.go index 812bbbaf..e183ed6e 100644 --- a/pkg/nvcdi/workarounds-device-folder-permissions.go +++ b/pkg/nvcdi/workarounds-device-folder-permissions.go @@ -26,7 +26,7 @@ import ( type deviceFolderPermissions struct { logger logger.Interface - driverRoot string + devRoot string nvidiaCTKPath string devices discover.Discover } @@ -39,10 +39,10 @@ var _ discover.Discover = (*deviceFolderPermissions)(nil) // The nested devices that are applicable to the NVIDIA GPU devices are: // - DRM devices at /dev/dri/* // - NVIDIA Caps devices at /dev/nvidia-caps/* -func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, driverRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover { +func newDeviceFolderPermissionHookDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath string, devices discover.Discover) discover.Discover { d := &deviceFolderPermissions{ logger: logger, - driverRoot: driverRoot, + devRoot: devRoot, nvidiaCTKPath: nvidiaCTKPath, devices: devices, } From 00a712d0183b0ed0ac7fc15019321838b706de69 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 20 Nov 2023 14:24:39 +0100 Subject: [PATCH 2/4] Add --dev-root option to CDI spec generation Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/cdi/generate/generate.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 63828336..124fbc0f 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -45,6 +45,7 @@ type options struct { format string deviceNameStrategy string driverRoot string + devRoot string nvidiaCTKPath string mode string vendor string @@ -101,6 +102,11 @@ func (m command) build() *cli.Command { Value: nvcdi.ModeAuto, Destination: &opts.mode, }, + &cli.StringFlag{ + Name: "dev-root", + Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.", + Destination: &opts.devRoot, + }, &cli.StringFlag{ Name: "device-name-strategy", Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]", @@ -236,6 +242,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { cdilib, err := nvcdi.New( nvcdi.WithLogger(m.logger), nvcdi.WithDriverRoot(opts.driverRoot), + nvcdi.WithDevRoot(opts.devRoot), nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath), nvcdi.WithDeviceNamer(deviceNamer), nvcdi.WithMode(opts.mode), From 3a96a003628b734d7b2439971899b0de5d00882e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 20 Nov 2023 15:03:36 +0100 Subject: [PATCH 3/4] Simplify meta device discovery Signed-off-by: Evan Lezar --- internal/discover/char_devices.go | 13 +++---------- internal/discover/gds.go | 12 ++++++------ internal/discover/graphics.go | 6 +----- internal/discover/mofed.go | 4 ++-- internal/platform-support/tegra/csv.go | 3 +-- pkg/nvcdi/common-nvml.go | 7 +------ pkg/nvcdi/device-wsl.go | 2 +- pkg/nvcdi/full-gpu-nvml.go | 2 +- pkg/nvcdi/management.go | 2 +- pkg/nvcdi/mig-device-nvml.go | 2 +- 10 files changed, 18 insertions(+), 35 deletions(-) diff --git a/internal/discover/char_devices.go b/internal/discover/char_devices.go index 715d979a..8d59430d 100644 --- a/internal/discover/char_devices.go +++ b/internal/discover/char_devices.go @@ -27,20 +27,13 @@ type charDevices mounts var _ Discover = (*charDevices)(nil) // NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes. -func NewCharDeviceDiscoverer(logger logger.Interface, devices []string, root string) Discover { +func NewCharDeviceDiscoverer(logger logger.Interface, devRoot string, devices []string) Discover { locator := lookup.NewCharDeviceLocator( lookup.WithLogger(logger), - lookup.WithRoot(root), + lookup.WithRoot(devRoot), ) - return NewDeviceDiscoverer(logger, locator, root, devices) -} - -// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator. -func NewDeviceDiscoverer(logger logger.Interface, locator lookup.Locator, root string, devices []string) Discover { - m := NewMounts(logger, locator, root, devices).(*mounts) - - return (*charDevices)(m) + return (*charDevices)(newMounts(logger, locator, devRoot, devices)) } // Mounts returns the discovered mounts for the charDevices. diff --git a/internal/discover/gds.go b/internal/discover/gds.go index c0307f26..cf762cd8 100644 --- a/internal/discover/gds.go +++ b/internal/discover/gds.go @@ -29,17 +29,17 @@ type gdsDeviceDiscoverer struct { } // NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts. -func NewGDSDiscoverer(logger logger.Interface, root string) (Discover, error) { +func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string) (Discover, error) { devices := NewCharDeviceDiscoverer( logger, + devRoot, []string{"/dev/nvidia-fs*"}, - root, ) udev := NewMounts( logger, - lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(root)), - root, + lookup.NewDirectoryLocator(lookup.WithLogger(logger), lookup.WithRoot(driverRoot)), + driverRoot, []string{"/run/udev"}, ) @@ -47,9 +47,9 @@ func NewGDSDiscoverer(logger logger.Interface, root string) (Discover, error) { logger, lookup.NewFileLocator( lookup.WithLogger(logger), - lookup.WithRoot(root), + lookup.WithRoot(driverRoot), ), - root, + driverRoot, []string{"/etc/cufile.json"}, ) diff --git a/internal/discover/graphics.go b/internal/discover/graphics.go index d70d4738..53b9a343 100644 --- a/internal/discover/graphics.go +++ b/internal/discover/graphics.go @@ -188,12 +188,8 @@ func (d drmDevicesByPath) getSpecificLinkArgs(devices []Device) ([]string, error // newDRMDeviceDiscoverer creates a discoverer for the DRM devices associated with the requested devices. func newDRMDeviceDiscoverer(logger logger.Interface, devices image.VisibleDevices, devRoot string) (Discover, error) { - allDevices := NewDeviceDiscoverer( + allDevices := NewCharDeviceDiscoverer( logger, - lookup.NewCharDeviceLocator( - lookup.WithLogger(logger), - lookup.WithRoot(devRoot), - ), devRoot, []string{ "/dev/dri/card*", diff --git a/internal/discover/mofed.go b/internal/discover/mofed.go index c893db3b..b9ff4153 100644 --- a/internal/discover/mofed.go +++ b/internal/discover/mofed.go @@ -19,14 +19,14 @@ package discover import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" // NewMOFEDDiscoverer creates a discoverer for MOFED devices. -func NewMOFEDDiscoverer(logger logger.Interface, root string) (Discover, error) { +func NewMOFEDDiscoverer(logger logger.Interface, devRoot string) (Discover, error) { devices := NewCharDeviceDiscoverer( logger, + devRoot, []string{ "/dev/infiniband/uverbs*", "/dev/infiniband/rdma_cm", }, - root, ) return devices, nil diff --git a/internal/platform-support/tegra/csv.go b/internal/platform-support/tegra/csv.go index ff2f4932..161e6a7a 100644 --- a/internal/platform-support/tegra/csv.go +++ b/internal/platform-support/tegra/csv.go @@ -36,9 +36,8 @@ func (o tegraOptions) newDiscovererFromCSVFiles() (discover.Discover, error) { targetsByType := getTargetsFromCSVFiles(o.logger, o.csvFiles) - devices := discover.NewDeviceDiscoverer( + devices := discover.NewCharDeviceDiscoverer( o.logger, - lookup.NewCharDeviceLocator(lookup.WithLogger(o.logger), lookup.WithRoot(o.driverRoot)), o.driverRoot, targetsByType[csv.MountSpecDev], ) diff --git a/pkg/nvcdi/common-nvml.go b/pkg/nvcdi/common-nvml.go index f7a22c8a..a33cfc61 100644 --- a/pkg/nvcdi/common-nvml.go +++ b/pkg/nvcdi/common-nvml.go @@ -20,18 +20,13 @@ import ( "fmt" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" - "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" ) // newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device. // This includes driver libraries and meta devices, for example. func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) { - metaDevices := discover.NewDeviceDiscoverer( + metaDevices := discover.NewCharDeviceDiscoverer( l.logger, - lookup.NewCharDeviceLocator( - lookup.WithLogger(l.logger), - lookup.WithRoot(l.devRoot), - ), l.devRoot, []string{ "/dev/nvidia-modeset", diff --git a/pkg/nvcdi/device-wsl.go b/pkg/nvcdi/device-wsl.go index b1d93468..0f9e1ffd 100644 --- a/pkg/nvcdi/device-wsl.go +++ b/pkg/nvcdi/device-wsl.go @@ -29,8 +29,8 @@ const ( func newDXGDeviceDiscoverer(logger logger.Interface, devRoot string) discover.Discover { deviceNodes := discover.NewCharDeviceDiscoverer( logger, - []string{dxgDeviceNode}, devRoot, + []string{dxgDeviceNode}, ) return deviceNodes diff --git a/pkg/nvcdi/full-gpu-nvml.go b/pkg/nvcdi/full-gpu-nvml.go index f22e449c..d47c802d 100644 --- a/pkg/nvcdi/full-gpu-nvml.go +++ b/pkg/nvcdi/full-gpu-nvml.go @@ -103,8 +103,8 @@ func newFullGPUDiscoverer(logger logger.Interface, devRoot string, nvidiaCTKPath deviceNodes := discover.NewCharDeviceDiscoverer( logger, - deviceNodePaths, devRoot, + deviceNodePaths, ) byPathHooks := &byPathHookDiscoverer{ diff --git a/pkg/nvcdi/management.go b/pkg/nvcdi/management.go index 8050a5b6..9352e110 100644 --- a/pkg/nvcdi/management.go +++ b/pkg/nvcdi/management.go @@ -109,6 +109,7 @@ type managementDiscoverer struct { func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, error) { deviceNodes := discover.NewCharDeviceDiscoverer( m.logger, + m.devRoot, []string{ "/dev/nvidia*", "/dev/nvidia-caps/nvidia-cap*", @@ -117,7 +118,6 @@ func (m *managementlib) newManagementDeviceDiscoverer() (discover.Discover, erro "/dev/nvidia-uvm", "/dev/nvidiactl", }, - m.devRoot, ) deviceFolderPermissionHooks := newDeviceFolderPermissionHookDiscoverer( diff --git a/pkg/nvcdi/mig-device-nvml.go b/pkg/nvcdi/mig-device-nvml.go index a442a738..db8601d9 100644 --- a/pkg/nvcdi/mig-device-nvml.go +++ b/pkg/nvcdi/mig-device-nvml.go @@ -112,12 +112,12 @@ func newComputeInstanceDiscoverer(logger logger.Interface, driverRoot string, gp deviceNodes := discover.NewCharDeviceDiscoverer( logger, + driverRoot, []string{ parentPath, giCapDevicePath, ciCapDevicePath, }, - driverRoot, ) return deviceNodes, nil From b4c683282857365ee341783a14df5bd3b5fbe6d0 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 20 Nov 2023 14:44:39 +0100 Subject: [PATCH 4/4] Add additional debug Signed-off-by: Evan Lezar --- internal/lookup/file.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/lookup/file.go b/internal/lookup/file.go index b5d2fa76..20a1f164 100644 --- a/internal/lookup/file.go +++ b/internal/lookup/file.go @@ -148,6 +148,7 @@ var _ Locator = (*file)(nil) func (p file) Locate(pattern string) ([]string, error) { var filenames []string + p.logger.Debugf("Locating %q in %v", pattern, p.prefixes) visit: for _, prefix := range p.prefixes { pathPattern := filepath.Join(prefix, pattern)