mirror of
				https://github.com/NVIDIA/nvidia-container-toolkit
				synced 2025-06-26 18:18:24 +00:00 
			
		
		
		
	Add option to load NVIDIA kernel modules
These changes add a --load-kernel-modules option to the nvidia-ctk system commands. If specified the NVIDIA kernel modules (nvidia, nvidia-uvm, and nvidia-modeset) are loaded before any operations on device nodes are performed. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
		
							parent
							
								
									528cbbb636
								
							
						
					
					
						commit
						7b801a0ce0
					
				| @ -24,6 +24,7 @@ import ( | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| 
 | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/internal/system" | ||||
| 	"github.com/fsnotify/fsnotify" | ||||
| 	"github.com/sirupsen/logrus" | ||||
| 	"github.com/urfave/cli/v2" | ||||
| @ -38,11 +39,12 @@ type command struct { | ||||
| } | ||||
| 
 | ||||
| type config struct { | ||||
| 	devCharPath string | ||||
| 	driverRoot  string | ||||
| 	dryRun      bool | ||||
| 	watch       bool | ||||
| 	createAll   bool | ||||
| 	devCharPath       string | ||||
| 	driverRoot        string | ||||
| 	dryRun            bool | ||||
| 	watch             bool | ||||
| 	createAll         bool | ||||
| 	loadKernelModules bool | ||||
| } | ||||
| 
 | ||||
| // NewCommand constructs a command sub-command with the specified logger
 | ||||
| @ -97,6 +99,12 @@ func (m command) build() *cli.Command { | ||||
| 			Destination: &cfg.createAll, | ||||
| 			EnvVars:     []string{"CREATE_ALL"}, | ||||
| 		}, | ||||
| 		&cli.BoolFlag{ | ||||
| 			Name:        "load-kernel-modules", | ||||
| 			Usage:       "Load the NVIDIA kernel modules before creating symlinks. This is only applicable when --create-all is set.", | ||||
| 			Destination: &cfg.loadKernelModules, | ||||
| 			EnvVars:     []string{"LOAD_KERNEL_MODULES"}, | ||||
| 		}, | ||||
| 		&cli.BoolFlag{ | ||||
| 			Name:        "dry-run", | ||||
| 			Usage:       "If set, the command will not create any symlinks.", | ||||
| @ -114,6 +122,11 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error { | ||||
| 		return fmt.Errorf("create-all and watch are mutually exclusive") | ||||
| 	} | ||||
| 
 | ||||
| 	if cfg.loadKernelModules && !cfg.createAll { | ||||
| 		m.logger.Warn("load-kernel-modules is only applicable when create-all is set; ignoring") | ||||
| 		cfg.loadKernelModules = false | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| @ -137,6 +150,7 @@ func (m command) run(c *cli.Context, cfg *config) error { | ||||
| 		WithDriverRoot(cfg.driverRoot), | ||||
| 		WithDryRun(cfg.dryRun), | ||||
| 		WithCreateAll(cfg.createAll), | ||||
| 		WithLoadKernelModules(cfg.loadKernelModules), | ||||
| 	) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("failed to create symlink creator: %v", err) | ||||
| @ -186,12 +200,13 @@ create: | ||||
| } | ||||
| 
 | ||||
| type linkCreator struct { | ||||
| 	logger      *logrus.Logger | ||||
| 	lister      nodeLister | ||||
| 	driverRoot  string | ||||
| 	devCharPath string | ||||
| 	dryRun      bool | ||||
| 	createAll   bool | ||||
| 	logger            *logrus.Logger | ||||
| 	lister            nodeLister | ||||
| 	driverRoot        string | ||||
| 	devCharPath       string | ||||
| 	dryRun            bool | ||||
| 	createAll         bool | ||||
| 	loadKernelModules bool | ||||
| } | ||||
| 
 | ||||
| // Creator is an interface for creating symlinks to /dev/nv* devices in /dev/char.
 | ||||
| @ -218,6 +233,19 @@ func NewSymlinkCreator(opts ...Option) (Creator, error) { | ||||
| 		c.devCharPath = defaultDevCharPath | ||||
| 	} | ||||
| 
 | ||||
| 	if c.loadKernelModules { | ||||
| 		s, err := system.New( | ||||
| 			system.WithLogger(c.logger), | ||||
| 			system.WithDryRun(c.dryRun), | ||||
| 		) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if err := s.LoadNVIDIAKernelModules(); err != nil { | ||||
| 			return nil, fmt.Errorf("failed to load NVIDIA kernel modules: %v", err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if c.createAll { | ||||
| 		lister, err := newAllPossible(c.logger, c.driverRoot) | ||||
| 		if err != nil { | ||||
| @ -265,6 +293,13 @@ func WithCreateAll(createAll bool) Option { | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // WithLoadKernelModules sets the loadKernelModules flag for the linkCreator.
 | ||||
| func WithLoadKernelModules(loadKernelModules bool) Option { | ||||
| 	return func(lc *linkCreator) { | ||||
| 		lc.loadKernelModules = loadKernelModules | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // CreateLinks creates symlinks for all NVIDIA device nodes found in the driver root.
 | ||||
| func (m linkCreator) CreateLinks() error { | ||||
| 	deviceNodes, err := m.lister.DeviceNodes() | ||||
|  | ||||
| @ -34,6 +34,8 @@ type options struct { | ||||
| 	dryRun bool | ||||
| 
 | ||||
| 	control bool | ||||
| 
 | ||||
| 	loadKernelModules bool | ||||
| } | ||||
| 
 | ||||
| // NewCommand constructs a command sub-command with the specified logger
 | ||||
| @ -72,6 +74,11 @@ func (m command) build() *cli.Command { | ||||
| 			Usage:       "create all control device nodes: nvidiactl, nvidia-modeset, nvidia-uvm, nvidia-uvm-tools", | ||||
| 			Destination: &opts.control, | ||||
| 		}, | ||||
| 		&cli.BoolFlag{ | ||||
| 			Name:        "load-kernel-modules", | ||||
| 			Usage:       "load the NVIDIA Kernel Modules before creating devices nodes", | ||||
| 			Destination: &opts.loadKernelModules, | ||||
| 		}, | ||||
| 		&cli.BoolFlag{ | ||||
| 			Name:        "dry-run", | ||||
| 			Usage:       "if set, the command will not create any symlinks.", | ||||
| @ -92,6 +99,7 @@ func (m command) run(c *cli.Context, opts *options) error { | ||||
| 	s, err := system.New( | ||||
| 		system.WithLogger(m.logger), | ||||
| 		system.WithDryRun(opts.dryRun), | ||||
| 		system.WithLoadKernelModules(opts.loadKernelModules), | ||||
| 	) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("failed to create library: %v", err) | ||||
|  | ||||
| @ -34,3 +34,10 @@ func WithDryRun(dryRun bool) Option { | ||||
| 		i.dryRun = dryRun | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // WithLoadKernelModules sets the load kernel modules flag
 | ||||
| func WithLoadKernelModules(loadKernelModules bool) Option { | ||||
| 	return func(i *Interface) { | ||||
| 		i.loadKernelModules = loadKernelModules | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @ -19,6 +19,7 @@ package system | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 	"path/filepath" | ||||
| 	"strings" | ||||
| 
 | ||||
| @ -29,10 +30,10 @@ import ( | ||||
| 
 | ||||
| // Interface is the interface for the system command
 | ||||
| type Interface struct { | ||||
| 	logger *logrus.Logger | ||||
| 	dryRun bool | ||||
| 
 | ||||
| 	nvidiaDevices nvidiaDevices | ||||
| 	logger            *logrus.Logger | ||||
| 	dryRun            bool | ||||
| 	loadKernelModules bool | ||||
| 	nvidiaDevices     nvidiaDevices | ||||
| } | ||||
| 
 | ||||
| // New constructs a system command with the specified options
 | ||||
| @ -44,6 +45,12 @@ func New(opts ...Option) (*Interface, error) { | ||||
| 		opt(i) | ||||
| 	} | ||||
| 
 | ||||
| 	if i.loadKernelModules { | ||||
| 		if err := i.LoadNVIDIAKernelModules(); err != nil { | ||||
| 			return nil, fmt.Errorf("failed to load kernel modules: %v", err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	devices, err := devices.GetNVIDIADevices() | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("failed to create devices info: %v", err) | ||||
| @ -108,6 +115,26 @@ func (m *Interface) createDeviceNode(path string, major int, minor int) error { | ||||
| 	return unix.Chmod(path, 0666) | ||||
| } | ||||
| 
 | ||||
| // LoadNVIDIAKernelModules loads the NVIDIA kernel modules.
 | ||||
| func (m *Interface) LoadNVIDIAKernelModules() error { | ||||
| 	modules := []string{"nvidia", "nvidia-uvm", "nvidia-modeset"} | ||||
| 
 | ||||
| 	for _, module := range modules { | ||||
| 		if m.dryRun { | ||||
| 			m.logger.Infof("Running: /sbin/modprobe %s", module) | ||||
| 			continue | ||||
| 		} | ||||
| 		cmd := exec.Command("/sbin/modprobe", module) | ||||
| 
 | ||||
| 		if output, err := cmd.CombinedOutput(); err != nil { | ||||
| 			m.logger.Debugf("Failed to load kernel module %s: %v", module, string(output)) | ||||
| 			return fmt.Errorf("failed to load kernel module %s: %v", module, err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| type nvidiaDevices struct { | ||||
| 	devices.Devices | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user