mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-08 15:47:32 +00:00
Add option to load NVIDIA kernel modules
These changes add a --load-kernel-modules option to the nvidia-ctk system commands. If specified the NVIDIA kernel modules (nvidia, nvidia-uvm, and nvidia-modeset) are loaded before any operations on device nodes are performed. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
528cbbb636
commit
7b801a0ce0
@ -24,6 +24,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
|
||||||
"github.com/fsnotify/fsnotify"
|
"github.com/fsnotify/fsnotify"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/urfave/cli/v2"
|
"github.com/urfave/cli/v2"
|
||||||
@ -43,6 +44,7 @@ type config struct {
|
|||||||
dryRun bool
|
dryRun bool
|
||||||
watch bool
|
watch bool
|
||||||
createAll bool
|
createAll bool
|
||||||
|
loadKernelModules bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCommand constructs a command sub-command with the specified logger
|
// NewCommand constructs a command sub-command with the specified logger
|
||||||
@ -97,6 +99,12 @@ func (m command) build() *cli.Command {
|
|||||||
Destination: &cfg.createAll,
|
Destination: &cfg.createAll,
|
||||||
EnvVars: []string{"CREATE_ALL"},
|
EnvVars: []string{"CREATE_ALL"},
|
||||||
},
|
},
|
||||||
|
&cli.BoolFlag{
|
||||||
|
Name: "load-kernel-modules",
|
||||||
|
Usage: "Load the NVIDIA kernel modules before creating symlinks. This is only applicable when --create-all is set.",
|
||||||
|
Destination: &cfg.loadKernelModules,
|
||||||
|
EnvVars: []string{"LOAD_KERNEL_MODULES"},
|
||||||
|
},
|
||||||
&cli.BoolFlag{
|
&cli.BoolFlag{
|
||||||
Name: "dry-run",
|
Name: "dry-run",
|
||||||
Usage: "If set, the command will not create any symlinks.",
|
Usage: "If set, the command will not create any symlinks.",
|
||||||
@ -114,6 +122,11 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
|
|||||||
return fmt.Errorf("create-all and watch are mutually exclusive")
|
return fmt.Errorf("create-all and watch are mutually exclusive")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cfg.loadKernelModules && !cfg.createAll {
|
||||||
|
m.logger.Warn("load-kernel-modules is only applicable when create-all is set; ignoring")
|
||||||
|
cfg.loadKernelModules = false
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,6 +150,7 @@ func (m command) run(c *cli.Context, cfg *config) error {
|
|||||||
WithDriverRoot(cfg.driverRoot),
|
WithDriverRoot(cfg.driverRoot),
|
||||||
WithDryRun(cfg.dryRun),
|
WithDryRun(cfg.dryRun),
|
||||||
WithCreateAll(cfg.createAll),
|
WithCreateAll(cfg.createAll),
|
||||||
|
WithLoadKernelModules(cfg.loadKernelModules),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create symlink creator: %v", err)
|
return fmt.Errorf("failed to create symlink creator: %v", err)
|
||||||
@ -192,6 +206,7 @@ type linkCreator struct {
|
|||||||
devCharPath string
|
devCharPath string
|
||||||
dryRun bool
|
dryRun bool
|
||||||
createAll bool
|
createAll bool
|
||||||
|
loadKernelModules bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creator is an interface for creating symlinks to /dev/nv* devices in /dev/char.
|
// Creator is an interface for creating symlinks to /dev/nv* devices in /dev/char.
|
||||||
@ -218,6 +233,19 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) {
|
|||||||
c.devCharPath = defaultDevCharPath
|
c.devCharPath = defaultDevCharPath
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c.loadKernelModules {
|
||||||
|
s, err := system.New(
|
||||||
|
system.WithLogger(c.logger),
|
||||||
|
system.WithDryRun(c.dryRun),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := s.LoadNVIDIAKernelModules(); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to load NVIDIA kernel modules: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if c.createAll {
|
if c.createAll {
|
||||||
lister, err := newAllPossible(c.logger, c.driverRoot)
|
lister, err := newAllPossible(c.logger, c.driverRoot)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -265,6 +293,13 @@ func WithCreateAll(createAll bool) Option {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithLoadKernelModules sets the loadKernelModules flag for the linkCreator.
|
||||||
|
func WithLoadKernelModules(loadKernelModules bool) Option {
|
||||||
|
return func(lc *linkCreator) {
|
||||||
|
lc.loadKernelModules = loadKernelModules
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// CreateLinks creates symlinks for all NVIDIA device nodes found in the driver root.
|
// CreateLinks creates symlinks for all NVIDIA device nodes found in the driver root.
|
||||||
func (m linkCreator) CreateLinks() error {
|
func (m linkCreator) CreateLinks() error {
|
||||||
deviceNodes, err := m.lister.DeviceNodes()
|
deviceNodes, err := m.lister.DeviceNodes()
|
||||||
|
@ -34,6 +34,8 @@ type options struct {
|
|||||||
dryRun bool
|
dryRun bool
|
||||||
|
|
||||||
control bool
|
control bool
|
||||||
|
|
||||||
|
loadKernelModules bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCommand constructs a command sub-command with the specified logger
|
// NewCommand constructs a command sub-command with the specified logger
|
||||||
@ -72,6 +74,11 @@ func (m command) build() *cli.Command {
|
|||||||
Usage: "create all control device nodes: nvidiactl, nvidia-modeset, nvidia-uvm, nvidia-uvm-tools",
|
Usage: "create all control device nodes: nvidiactl, nvidia-modeset, nvidia-uvm, nvidia-uvm-tools",
|
||||||
Destination: &opts.control,
|
Destination: &opts.control,
|
||||||
},
|
},
|
||||||
|
&cli.BoolFlag{
|
||||||
|
Name: "load-kernel-modules",
|
||||||
|
Usage: "load the NVIDIA Kernel Modules before creating devices nodes",
|
||||||
|
Destination: &opts.loadKernelModules,
|
||||||
|
},
|
||||||
&cli.BoolFlag{
|
&cli.BoolFlag{
|
||||||
Name: "dry-run",
|
Name: "dry-run",
|
||||||
Usage: "if set, the command will not create any symlinks.",
|
Usage: "if set, the command will not create any symlinks.",
|
||||||
@ -92,6 +99,7 @@ func (m command) run(c *cli.Context, opts *options) error {
|
|||||||
s, err := system.New(
|
s, err := system.New(
|
||||||
system.WithLogger(m.logger),
|
system.WithLogger(m.logger),
|
||||||
system.WithDryRun(opts.dryRun),
|
system.WithDryRun(opts.dryRun),
|
||||||
|
system.WithLoadKernelModules(opts.loadKernelModules),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create library: %v", err)
|
return fmt.Errorf("failed to create library: %v", err)
|
||||||
|
@ -34,3 +34,10 @@ func WithDryRun(dryRun bool) Option {
|
|||||||
i.dryRun = dryRun
|
i.dryRun = dryRun
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithLoadKernelModules sets the load kernel modules flag
|
||||||
|
func WithLoadKernelModules(loadKernelModules bool) Option {
|
||||||
|
return func(i *Interface) {
|
||||||
|
i.loadKernelModules = loadKernelModules
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -19,6 +19,7 @@ package system
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -31,7 +32,7 @@ import (
|
|||||||
type Interface struct {
|
type Interface struct {
|
||||||
logger *logrus.Logger
|
logger *logrus.Logger
|
||||||
dryRun bool
|
dryRun bool
|
||||||
|
loadKernelModules bool
|
||||||
nvidiaDevices nvidiaDevices
|
nvidiaDevices nvidiaDevices
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,6 +45,12 @@ func New(opts ...Option) (*Interface, error) {
|
|||||||
opt(i)
|
opt(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if i.loadKernelModules {
|
||||||
|
if err := i.LoadNVIDIAKernelModules(); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to load kernel modules: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
devices, err := devices.GetNVIDIADevices()
|
devices, err := devices.GetNVIDIADevices()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create devices info: %v", err)
|
return nil, fmt.Errorf("failed to create devices info: %v", err)
|
||||||
@ -108,6 +115,26 @@ func (m *Interface) createDeviceNode(path string, major int, minor int) error {
|
|||||||
return unix.Chmod(path, 0666)
|
return unix.Chmod(path, 0666)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LoadNVIDIAKernelModules loads the NVIDIA kernel modules.
|
||||||
|
func (m *Interface) LoadNVIDIAKernelModules() error {
|
||||||
|
modules := []string{"nvidia", "nvidia-uvm", "nvidia-modeset"}
|
||||||
|
|
||||||
|
for _, module := range modules {
|
||||||
|
if m.dryRun {
|
||||||
|
m.logger.Infof("Running: /sbin/modprobe %s", module)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cmd := exec.Command("/sbin/modprobe", module)
|
||||||
|
|
||||||
|
if output, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
m.logger.Debugf("Failed to load kernel module %s: %v", module, string(output))
|
||||||
|
return fmt.Errorf("failed to load kernel module %s: %v", module, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
type nvidiaDevices struct {
|
type nvidiaDevices struct {
|
||||||
devices.Devices
|
devices.Devices
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user