Merge branch 'fix-load-kernel-modules' into 'main'

Split internal system package

See merge request nvidia/container-toolkit/container-toolkit!420
This commit is contained in:
Evan Lezar
2023-06-26 08:30:51 +00:00
committed by Evan Lezar
parent 7c807c2c22
commit cc06766f25
17 changed files with 959 additions and 226 deletions

View File

@@ -28,14 +28,14 @@ import (
type allPossible struct {
logger *logrus.Logger
driverRoot string
devRoot string
deviceMajors devices.Devices
migCaps nvcaps.MigCaps
}
// newAllPossible returns a new allPossible device node lister.
// This lister lists all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func newAllPossible(logger *logrus.Logger, driverRoot string) (nodeLister, error) {
func newAllPossible(logger *logrus.Logger, devRoot string) (nodeLister, error) {
deviceMajors, err := devices.GetNVIDIADevices()
if err != nil {
return nil, fmt.Errorf("failed reading device majors: %v", err)
@@ -61,7 +61,7 @@ func newAllPossible(logger *logrus.Logger, driverRoot string) (nodeLister, error
l := allPossible{
logger: logger,
driverRoot: driverRoot,
devRoot: devRoot,
deviceMajors: deviceMajors,
migCaps: migCaps,
}
@@ -72,7 +72,7 @@ func newAllPossible(logger *logrus.Logger, driverRoot string) (nodeLister, error
// DeviceNodes returns a list of all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
func (m allPossible) DeviceNodes() ([]deviceNode, error) {
gpus, err := nvpci.NewFrom(
filepath.Join(m.driverRoot, nvpci.PCIDevicesRoot),
filepath.Join(m.devRoot, nvpci.PCIDevicesRoot),
).GetGPUs()
if err != nil {
return nil, fmt.Errorf("failed to get GPU information: %v", err)
@@ -80,7 +80,7 @@ func (m allPossible) DeviceNodes() ([]deviceNode, error) {
count := len(gpus)
if count == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
return nil, nil
}
@@ -179,7 +179,7 @@ func (m allPossible) newDeviceNode(deviceName devices.Name, path string, minor i
major, _ := m.deviceMajors.Get(deviceName)
return deviceNode{
path: filepath.Join(m.driverRoot, path),
path: filepath.Join(m.devRoot, path),
major: uint32(major),
minor: uint32(minor),
}

View File

@@ -24,7 +24,8 @@ import (
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/fsnotify/fsnotify"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
@@ -216,6 +217,7 @@ type linkCreator struct {
logger *logrus.Logger
lister nodeLister
driverRoot string
devRoot string
devCharPath string
dryRun bool
createAll bool
@@ -243,6 +245,9 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
if c.driverRoot == "" {
c.driverRoot = "/"
}
if c.devRoot == "" {
c.devRoot = "/"
}
if c.devCharPath == "" {
c.devCharPath = defaultDevCharPath
}
@@ -252,13 +257,13 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
}
if c.createAll {
lister, err := newAllPossible(c.logger, c.driverRoot)
lister, err := newAllPossible(c.logger, c.devRoot)
if err != nil {
return nil, fmt.Errorf("failed to create all possible device lister: %v", err)
}
c.lister = lister
} else {
c.lister = existing{c.logger, c.driverRoot}
c.lister = existing{c.logger, c.devRoot}
}
return c, nil
}
@@ -268,36 +273,48 @@ func (m linkCreator) setup() error {
return nil
}
s, err := system.New(
system.WithLogger(m.logger),
system.WithDryRun(m.dryRun),
)
if err != nil {
return err
}
if m.loadKernelModules {
if err := s.LoadNVIDIAKernelModules(); err != nil {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(m.dryRun),
nvmodules.WithRoot(m.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if m.createDeviceNodes {
if err := s.CreateNVIDIAControlDeviceNodesAt(m.driverRoot); err != nil {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(m.dryRun),
nvdevices.WithDevRoot(m.devRoot),
)
if err != nil {
return err
}
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA device nodes: %v", err)
}
}
return nil
}
// WithDriverRoot sets the driver root path.
// This is the path in which kernel modules must be loaded.
func WithDriverRoot(root string) Option {
return func(c *linkCreator) {
c.driverRoot = root
}
}
// WithDevRoot sets the root path for the /dev directory.
func WithDevRoot(root string) Option {
return func(c *linkCreator) {
c.devRoot = root
}
}
// WithDevCharPath sets the path at which the symlinks will be created.
func WithDevCharPath(path string) Option {
return func(c *linkCreator) {

View File

@@ -30,8 +30,8 @@ type nodeLister interface {
}
type existing struct {
logger *logrus.Logger
driverRoot string
logger *logrus.Logger
devRoot string
}
// DeviceNodes returns a list of NVIDIA device nodes in the specified root.
@@ -39,7 +39,7 @@ type existing struct {
func (m existing) DeviceNodes() ([]deviceNode, error) {
locator := lookup.NewCharDeviceLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(m.driverRoot),
lookup.WithRoot(m.devRoot),
lookup.WithOptional(true),
)
@@ -54,7 +54,7 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
}
if len(devices) == 0 && len(capDevices) == 0 {
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
return nil, nil
}

View File

@@ -19,7 +19,8 @@ package createdevicenodes
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
@@ -96,19 +97,29 @@ func (m command) validateFlags(r *cli.Context, opts *options) error {
}
func (m command) run(c *cli.Context, opts *options) error {
s, err := system.New(
system.WithLogger(m.logger),
system.WithDryRun(opts.dryRun),
system.WithLoadKernelModules(opts.loadKernelModules),
)
if err != nil {
return fmt.Errorf("failed to create library: %v", err)
if opts.loadKernelModules {
modules := nvmodules.New(
nvmodules.WithLogger(m.logger),
nvmodules.WithDryRun(opts.dryRun),
nvmodules.WithRoot(opts.driverRoot),
)
if err := modules.LoadAll(); err != nil {
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
}
}
if opts.control {
devices, err := nvdevices.New(
nvdevices.WithLogger(m.logger),
nvdevices.WithDryRun(opts.dryRun),
nvdevices.WithDevRoot(opts.driverRoot),
)
if err != nil {
return err
}
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
if err := s.CreateNVIDIAControlDeviceNodesAt(opts.driverRoot); err != nil {
return fmt.Errorf("failed to create control device nodes: %v", err)
if err := devices.CreateNVIDIAControlDevices(); err != nil {
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
}
}
return nil