mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Split internal system package
This changes splits the functionality in the internal system package into two packages: one for dealing with devices and one for dealing with kernel modules. This removes ambiguity around the meaning of driver / device roots in each case. In each case, a root can be specified where device nodes are created or kernel modules loaded. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
@@ -28,14 +28,14 @@ import (
|
||||
|
||||
type allPossible struct {
|
||||
logger logger.Interface
|
||||
driverRoot string
|
||||
devRoot string
|
||||
deviceMajors devices.Devices
|
||||
migCaps nvcaps.MigCaps
|
||||
}
|
||||
|
||||
// newAllPossible returns a new allPossible device node lister.
|
||||
// This lister lists all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
|
||||
func newAllPossible(logger logger.Interface, driverRoot string) (nodeLister, error) {
|
||||
func newAllPossible(logger logger.Interface, devRoot string) (nodeLister, error) {
|
||||
deviceMajors, err := devices.GetNVIDIADevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed reading device majors: %v", err)
|
||||
@@ -61,7 +61,7 @@ func newAllPossible(logger logger.Interface, driverRoot string) (nodeLister, err
|
||||
|
||||
l := allPossible{
|
||||
logger: logger,
|
||||
driverRoot: driverRoot,
|
||||
devRoot: devRoot,
|
||||
deviceMajors: deviceMajors,
|
||||
migCaps: migCaps,
|
||||
}
|
||||
@@ -72,7 +72,7 @@ func newAllPossible(logger logger.Interface, driverRoot string) (nodeLister, err
|
||||
// DeviceNodes returns a list of all possible device nodes for NVIDIA GPUs, control devices, and capability devices.
|
||||
func (m allPossible) DeviceNodes() ([]deviceNode, error) {
|
||||
gpus, err := nvpci.New(
|
||||
nvpci.WithPCIDevicesRoot(filepath.Join(m.driverRoot, nvpci.PCIDevicesRoot)),
|
||||
nvpci.WithPCIDevicesRoot(filepath.Join(m.devRoot, nvpci.PCIDevicesRoot)),
|
||||
).GetGPUs()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get GPU information: %v", err)
|
||||
@@ -80,7 +80,7 @@ func (m allPossible) DeviceNodes() ([]deviceNode, error) {
|
||||
|
||||
count := len(gpus)
|
||||
if count == 0 {
|
||||
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
|
||||
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -179,7 +179,7 @@ func (m allPossible) newDeviceNode(deviceName devices.Name, path string, minor i
|
||||
major, _ := m.deviceMajors.Get(deviceName)
|
||||
|
||||
return deviceNode{
|
||||
path: filepath.Join(m.driverRoot, path),
|
||||
path: filepath.Join(m.devRoot, path),
|
||||
major: uint32(major),
|
||||
minor: uint32(minor),
|
||||
}
|
||||
|
||||
@@ -25,7 +25,8 @@ import (
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
@@ -216,6 +217,7 @@ type linkCreator struct {
|
||||
logger logger.Interface
|
||||
lister nodeLister
|
||||
driverRoot string
|
||||
devRoot string
|
||||
devCharPath string
|
||||
dryRun bool
|
||||
createAll bool
|
||||
@@ -243,6 +245,9 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
|
||||
if c.driverRoot == "" {
|
||||
c.driverRoot = "/"
|
||||
}
|
||||
if c.devRoot == "" {
|
||||
c.devRoot = "/"
|
||||
}
|
||||
if c.devCharPath == "" {
|
||||
c.devCharPath = defaultDevCharPath
|
||||
}
|
||||
@@ -252,13 +257,13 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
|
||||
}
|
||||
|
||||
if c.createAll {
|
||||
lister, err := newAllPossible(c.logger, c.driverRoot)
|
||||
lister, err := newAllPossible(c.logger, c.devRoot)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create all possible device lister: %v", err)
|
||||
}
|
||||
c.lister = lister
|
||||
} else {
|
||||
c.lister = existing{c.logger, c.driverRoot}
|
||||
c.lister = existing{c.logger, c.devRoot}
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
@@ -268,36 +273,48 @@ func (m linkCreator) setup() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
s, err := system.New(
|
||||
system.WithLogger(m.logger),
|
||||
system.WithDryRun(m.dryRun),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if m.loadKernelModules {
|
||||
if err := s.LoadNVIDIAKernelModules(); err != nil {
|
||||
modules := nvmodules.New(
|
||||
nvmodules.WithLogger(m.logger),
|
||||
nvmodules.WithDryRun(m.dryRun),
|
||||
nvmodules.WithRoot(m.driverRoot),
|
||||
)
|
||||
if err := modules.LoadAll(); err != nil {
|
||||
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if m.createDeviceNodes {
|
||||
if err := s.CreateNVIDIAControlDeviceNodesAt(m.driverRoot); err != nil {
|
||||
devices, err := nvdevices.New(
|
||||
nvdevices.WithLogger(m.logger),
|
||||
nvdevices.WithDryRun(m.dryRun),
|
||||
nvdevices.WithDevRoot(m.devRoot),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := devices.CreateNVIDIAControlDevices(); err != nil {
|
||||
return fmt.Errorf("failed to create NVIDIA device nodes: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithDriverRoot sets the driver root path.
|
||||
// This is the path in which kernel modules must be loaded.
|
||||
func WithDriverRoot(root string) Option {
|
||||
return func(c *linkCreator) {
|
||||
c.driverRoot = root
|
||||
}
|
||||
}
|
||||
|
||||
// WithDevRoot sets the root path for the /dev directory.
|
||||
func WithDevRoot(root string) Option {
|
||||
return func(c *linkCreator) {
|
||||
c.devRoot = root
|
||||
}
|
||||
}
|
||||
|
||||
// WithDevCharPath sets the path at which the symlinks will be created.
|
||||
func WithDevCharPath(path string) Option {
|
||||
return func(c *linkCreator) {
|
||||
|
||||
@@ -30,8 +30,8 @@ type nodeLister interface {
|
||||
}
|
||||
|
||||
type existing struct {
|
||||
logger logger.Interface
|
||||
driverRoot string
|
||||
logger logger.Interface
|
||||
devRoot string
|
||||
}
|
||||
|
||||
// DeviceNodes returns a list of NVIDIA device nodes in the specified root.
|
||||
@@ -39,7 +39,7 @@ type existing struct {
|
||||
func (m existing) DeviceNodes() ([]deviceNode, error) {
|
||||
locator := lookup.NewCharDeviceLocator(
|
||||
lookup.WithLogger(m.logger),
|
||||
lookup.WithRoot(m.driverRoot),
|
||||
lookup.WithRoot(m.devRoot),
|
||||
lookup.WithOptional(true),
|
||||
)
|
||||
|
||||
@@ -54,7 +54,7 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
|
||||
}
|
||||
|
||||
if len(devices) == 0 && len(capDevices) == 0 {
|
||||
m.logger.Infof("No NVIDIA devices found in %s", m.driverRoot)
|
||||
m.logger.Infof("No NVIDIA devices found in %s", m.devRoot)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,8 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
@@ -96,19 +97,29 @@ func (m command) validateFlags(r *cli.Context, opts *options) error {
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, opts *options) error {
|
||||
s, err := system.New(
|
||||
system.WithLogger(m.logger),
|
||||
system.WithDryRun(opts.dryRun),
|
||||
system.WithLoadKernelModules(opts.loadKernelModules),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create library: %v", err)
|
||||
if opts.loadKernelModules {
|
||||
modules := nvmodules.New(
|
||||
nvmodules.WithLogger(m.logger),
|
||||
nvmodules.WithDryRun(opts.dryRun),
|
||||
nvmodules.WithRoot(opts.driverRoot),
|
||||
)
|
||||
if err := modules.LoadAll(); err != nil {
|
||||
return fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if opts.control {
|
||||
devices, err := nvdevices.New(
|
||||
nvdevices.WithLogger(m.logger),
|
||||
nvdevices.WithDryRun(opts.dryRun),
|
||||
nvdevices.WithDevRoot(opts.driverRoot),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
|
||||
if err := s.CreateNVIDIAControlDeviceNodesAt(opts.driverRoot); err != nil {
|
||||
return fmt.Errorf("failed to create control device nodes: %v", err)
|
||||
if err := devices.CreateNVIDIAControlDevices(); err != nil {
|
||||
return fmt.Errorf("failed to create NVIDIA control device nodes: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
Reference in New Issue
Block a user