Split internal system package

This changes splits the functionality in the internal system package
into two packages: one for dealing with devices and one for dealing
with kernel modules. This removes ambiguity around the meaning of
driver / device roots in each case.

In each case, a root can be specified where device nodes are created
or kernel modules loaded.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2023-06-12 20:46:56 +02:00
parent c11c7695cb
commit d52dbeaa7a
16 changed files with 958 additions and 226 deletions

View File

@@ -28,14 +28,14 @@ import (
type allPossible struct {
logger logger.Interface
driverRoot string
devRoot string
deviceMajors devices.Devices
migCaps nvcaps.MigCaps
}
// newAllPossible returns a new allPossible device node lister.
// This lister lists all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func newAllPossible(logger logger.Interface, driverRoot string) (nodeLister, error) {
func newAllPossible(logger logger.Interface, devRoot string) (nodeLister, error) {
deviceMajors, err := devices.GetNVIDIADevices()
if err != nil {
return nil, fmt.Errorf("failed reading device majors: %v", err)
@@ -61,7 +61,7 @@ func newAllPossible(logger logger.Interface, driverRoot string) (nodeLister, err
l := allPossible{
logger: logger,
driverRoot: driverRoot,
devRoot: devRoot,
deviceMajors: deviceMajors,
migCaps: migCaps,
}
@@ -72,7 +72,7 @@ func newAllPossible(logger logger.Interface, driverRoot string) (nodeLister, err
// DeviceNodes returns a list of all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func (m allPossible) DeviceNodes() ([]deviceNode, error) {
gpus, err := nvpci.New(
nvpci.WithPCIDevicesRoot(filepath.Join(m.driverRoot, nvpci.PCIDevicesRoot)),
nvpci.WithPCIDevicesRoot(filepath.Join(m.devRoot, nvpci.PCIDevicesRoot)),
).GetGPUs()
if err != nil {
return nil, fmt.Errorf("failed to get GPU information: %v", err)
@@ -80,7 +80,7 @@ func (m allPossible) DeviceNodes() ([]deviceNode, error) {
count := len(gpus)
if count == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
return nil, nil
}
@@ -179,7 +179,7 @@ func (m allPossible) newDeviceNode(deviceName devices.Name, path string, minor i
major, _ := m.deviceMajors.Get(deviceName)
return deviceNode{
path: filepath.Join(m.driverRoot, path),
path: filepath.Join(m.devRoot, path),
major: uint32(major),
minor: uint32(minor),
}

View File

@@ -25,7 +25,8 @@ import (
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/fsnotify/fsnotify"
"github.com/urfave/cli/v2"
)
@@ -216,6 +217,7 @@ type linkCreator struct {
logger logger.Interface
lister nodeLister
driverRoot string
devRoot string
devCharPath string
dryRun bool
createAll bool
@@ -243,6 +245,9 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
if c.driverRoot == "" {
c.driverRoot = "/"
}
if c.devRoot == "" {
c.devRoot = "/"
}
if c.devCharPath == "" {
c.devCharPath = defaultDevCharPath
}
@@ -252,13 +257,13 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
}
if c.createAll {
lister, err := newAllPossible(c.logger, c.driverRoot)
lister, err := newAllPossible(c.logger, c.devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create all possible device lister: %v", err)
}
c.lister = lister
} else {
c.lister = existing{c.logger, c.driverRoot}
c.lister = existing{c.logger, c.devRoot}
}
return c, nil
}
@@ -268,36 +273,48 @@ func (m linkCreator) setup() error {
return nil
}
s, err := system.New(
system.WithLogger(m.logger),
system.WithDryRun(m.dryRun),
)
if err != nil {
return err
}
if m.loadKernelModules {
if err := s.LoadNVIDIAKernelModules(); err != nil {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(m.dryRun),
nvmodules.WithRoot(m.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if m.createDeviceNodes {
if err := s.CreateNVIDIAControlDeviceNodesAt(m.driverRoot); err != nil {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(m.dryRun),
nvdevices.WithDevRoot(m.devRoot),
)
if err != nil {
return err
}
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA device nodes: %v", err)
}
}
return nil
}
// WithDriverRoot sets the driver root path.
// This is the path in which kernel modules must be loaded.
func WithDriverRoot(root string) Option {
return func(c *linkCreator) {
c.driverRoot = root
}
}
// WithDevRoot sets the root path for the /dev directory.
func WithDevRoot(root string) Option {
return func(c *linkCreator) {
c.devRoot = root
}
}
// WithDevCharPath sets the path at which the symlinks will be created.
func WithDevCharPath(path string) Option {
return func(c *linkCreator) {

View File

@@ -30,8 +30,8 @@ type nodeLister interface {
}
type existing struct {
logger logger.Interface
driverRoot string
logger logger.Interface
devRoot string
}
// DeviceNodes returns a list of NVIDIA device nodes in the specified root.
@@ -39,7 +39,7 @@ type existing struct {
func (m existing) DeviceNodes() ([]deviceNode, error) {
locator := lookup.NewCharDeviceLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(m.driverRoot),
lookup.WithRoot(m.devRoot),
lookup.WithOptional(true),
)
@@ -54,7 +54,7 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
}
if len(devices) == 0 && len(capDevices) == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
return nil, nil
}

View File

@@ -20,7 +20,8 @@ import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/urfave/cli/v2"
)
@@ -96,19 +97,29 @@ func (m command) validateFlags(r *cli.Context, opts *options) error {
}
func (m command) run(c *cli.Context, opts *options) error {
s, err := system.New(
system.WithLogger(m.logger),
system.WithDryRun(opts.dryRun),
system.WithLoadKernelModules(opts.loadKernelModules),
)
if err != nil {
return fmt.Errorf("failed to create library: %v", err)
if opts.loadKernelModules {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(opts.dryRun),
nvmodules.WithRoot(opts.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if opts.control {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(opts.dryRun),
nvdevices.WithDevRoot(opts.driverRoot),
)
if err != nil {
return err
}
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
if err := s.CreateNVIDIAControlDeviceNodesAt(opts.driverRoot); err != nil {
return fmt.Errorf("failed to create control device nodes: %v", err)
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
}
}
return nil