2023-01-19 14:24:00 +00:00
/ * *
# Copyright ( c ) NVIDIA CORPORATION . All rights reserved .
#
# Licensed under the Apache License , Version 2.0 ( the "License" ) ;
# you may not use this file except in compliance with the License .
# You may obtain a copy of the License at
#
# http : //www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing , software
# distributed under the License is distributed on an "AS IS" BASIS ,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
# See the License for the specific language governing permissions and
# limitations under the License .
* * /
package devchar
import (
"fmt"
"os"
2023-01-23 15:36:40 +00:00
"os/signal"
2023-01-19 14:24:00 +00:00
"path/filepath"
2023-01-23 15:36:40 +00:00
"strings"
"syscall"
2023-01-19 14:24:00 +00:00
2023-12-01 01:10:10 +00:00
"github.com/fsnotify/fsnotify"
"github.com/urfave/cli/v2"
2023-03-22 12:27:43 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2023-06-12 18:46:56 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
2023-01-19 14:24:00 +00:00
)
const (
defaultDevCharPath = "/dev/char"
)
type command struct {
2023-03-22 12:27:43 +00:00
logger logger . Interface
2023-01-19 14:24:00 +00:00
}
type config struct {
2023-05-31 09:16:30 +00:00
devCharPath string
driverRoot string
dryRun bool
watch bool
createAll bool
2023-05-31 09:49:38 +00:00
createDeviceNodes bool
2023-05-31 09:16:30 +00:00
loadKernelModules bool
2023-01-19 14:24:00 +00:00
}
2023-01-27 10:41:30 +00:00
// NewCommand constructs a command sub-command with the specified logger
2023-03-22 12:27:43 +00:00
func NewCommand ( logger logger . Interface ) * cli . Command {
2023-01-19 14:24:00 +00:00
c := command {
logger : logger ,
}
return c . build ( )
}
// build
func ( m command ) build ( ) * cli . Command {
cfg := config { }
// Create the 'create-dev-char-symlinks' command
c := cli . Command {
Name : "create-dev-char-symlinks" ,
2023-01-27 10:41:30 +00:00
Usage : "A utility to create symlinks to possible /dev/nv* devices in /dev/char" ,
2023-01-24 09:06:21 +00:00
Before : func ( c * cli . Context ) error {
return m . validateFlags ( c , & cfg )
} ,
2023-01-19 14:24:00 +00:00
Action : func ( c * cli . Context ) error {
return m . run ( c , & cfg )
} ,
}
c . Flags = [ ] cli . Flag {
& cli . StringFlag {
Name : "dev-char-path" ,
Usage : "The path at which the symlinks will be created. Symlinks will be created as `DEV_CHAR`/MAJOR:MINOR where MAJOR and MINOR are the major and minor numbers of a corresponding device node." ,
Value : defaultDevCharPath ,
Destination : & cfg . devCharPath ,
EnvVars : [ ] string { "DEV_CHAR_PATH" } ,
} ,
& cli . StringFlag {
Name : "driver-root" ,
Usage : "The path to the driver root. `DRIVER_ROOT`/dev is searched for NVIDIA device nodes." ,
Value : "/" ,
Destination : & cfg . driverRoot ,
2024-02-09 13:28:02 +00:00
EnvVars : [ ] string { "NVIDIA_DRIVER_ROOT" , "DRIVER_ROOT" } ,
2023-01-19 14:24:00 +00:00
} ,
2023-01-23 15:36:40 +00:00
& cli . BoolFlag {
Name : "watch" ,
Usage : "If set, the command will watch for changes to the driver root and recreate the symlinks when changes are detected." ,
Value : false ,
Destination : & cfg . watch ,
EnvVars : [ ] string { "WATCH" } ,
} ,
2023-01-24 09:06:21 +00:00
& cli . BoolFlag {
Name : "create-all" ,
Usage : "Create all possible /dev/char symlinks instead of limiting these to existing device nodes." ,
Destination : & cfg . createAll ,
EnvVars : [ ] string { "CREATE_ALL" } ,
} ,
2023-05-31 09:16:30 +00:00
& cli . BoolFlag {
Name : "load-kernel-modules" ,
Usage : "Load the NVIDIA kernel modules before creating symlinks. This is only applicable when --create-all is set." ,
Destination : & cfg . loadKernelModules ,
EnvVars : [ ] string { "LOAD_KERNEL_MODULES" } ,
} ,
2023-05-31 09:49:38 +00:00
& cli . BoolFlag {
Name : "create-device-nodes" ,
Usage : "Create the NVIDIA control device nodes in the driver root if they do not exist. This is only applicable when --create-all is set" ,
Destination : & cfg . createDeviceNodes ,
EnvVars : [ ] string { "CREATE_DEVICE_NODES" } ,
} ,
2023-01-19 14:24:00 +00:00
& cli . BoolFlag {
Name : "dry-run" ,
Usage : "If set, the command will not create any symlinks." ,
Value : false ,
Destination : & cfg . dryRun ,
EnvVars : [ ] string { "DRY_RUN" } ,
} ,
}
return & c
}
2023-01-24 09:06:21 +00:00
func ( m command ) validateFlags ( r * cli . Context , cfg * config ) error {
if cfg . createAll && cfg . watch {
return fmt . Errorf ( "create-all and watch are mutually exclusive" )
}
2023-01-23 15:36:40 +00:00
2023-05-31 09:16:30 +00:00
if cfg . loadKernelModules && ! cfg . createAll {
2023-06-06 19:46:38 +00:00
m . logger . Warning ( "load-kernel-modules is only applicable when create-all is set; ignoring" )
2023-05-31 09:16:30 +00:00
cfg . loadKernelModules = false
}
2023-05-31 09:49:38 +00:00
if cfg . createDeviceNodes && ! cfg . createAll {
2023-06-06 19:46:38 +00:00
m . logger . Warning ( "create-device-nodes is only applicable when create-all is set; ignoring" )
2023-05-31 09:49:38 +00:00
cfg . createDeviceNodes = false
}
2023-01-24 09:06:21 +00:00
return nil
}
func ( m command ) run ( c * cli . Context , cfg * config ) error {
2023-01-23 15:36:40 +00:00
var watcher * fsnotify . Watcher
var sigs chan os . Signal
if cfg . watch {
watcher , err := newFSWatcher ( filepath . Join ( cfg . driverRoot , "dev" ) )
if err != nil {
return fmt . Errorf ( "failed to create FS watcher: %v" , err )
}
defer watcher . Close ( )
sigs = newOSWatcher ( syscall . SIGHUP , syscall . SIGINT , syscall . SIGTERM , syscall . SIGQUIT )
}
2023-01-24 09:06:21 +00:00
l , err := NewSymlinkCreator (
2023-01-19 14:24:00 +00:00
WithLogger ( m . logger ) ,
WithDevCharPath ( cfg . devCharPath ) ,
WithDriverRoot ( cfg . driverRoot ) ,
WithDryRun ( cfg . dryRun ) ,
2023-01-24 09:06:21 +00:00
WithCreateAll ( cfg . createAll ) ,
2023-05-31 09:16:30 +00:00
WithLoadKernelModules ( cfg . loadKernelModules ) ,
2023-05-31 09:49:38 +00:00
WithCreateDeviceNodes ( cfg . createDeviceNodes ) ,
2023-01-19 14:24:00 +00:00
)
2023-01-24 09:06:21 +00:00
if err != nil {
return fmt . Errorf ( "failed to create symlink creator: %v" , err )
}
2023-01-23 15:36:40 +00:00
create :
2023-01-24 09:06:21 +00:00
err = l . CreateLinks ( )
2023-01-19 14:24:00 +00:00
if err != nil {
return fmt . Errorf ( "failed to create links: %v" , err )
}
2023-01-23 15:36:40 +00:00
if ! cfg . watch {
return nil
}
for {
select {
case event := <- watcher . Events :
deviceNode := filepath . Base ( event . Name )
if ! strings . HasPrefix ( deviceNode , "nvidia" ) {
continue
}
if event . Op & fsnotify . Create == fsnotify . Create {
m . logger . Infof ( "%s created, restarting." , event . Name )
goto create
}
if event . Op & fsnotify . Create == fsnotify . Remove {
m . logger . Infof ( "%s removed. Ignoring" , event . Name )
}
// Watch for any other fs errors and log them.
case err := <- watcher . Errors :
m . logger . Errorf ( "inotify: %s" , err )
// React to signals
case s := <- sigs :
switch s {
case syscall . SIGHUP :
m . logger . Infof ( "Received SIGHUP, recreating symlinks." )
goto create
default :
m . logger . Infof ( "Received signal %q, shutting down." , s )
return nil
}
}
}
2023-01-19 14:24:00 +00:00
}
type linkCreator struct {
2023-03-22 12:27:43 +00:00
logger logger . Interface
2023-05-31 09:16:30 +00:00
lister nodeLister
driverRoot string
2023-06-12 18:46:56 +00:00
devRoot string
2023-05-31 09:16:30 +00:00
devCharPath string
dryRun bool
createAll bool
2023-05-31 09:49:38 +00:00
createDeviceNodes bool
2023-05-31 09:16:30 +00:00
loadKernelModules bool
2023-01-19 14:24:00 +00:00
}
// Creator is an interface for creating symlinks to /dev/nv* devices in /dev/char.
type Creator interface {
CreateLinks ( ) error
}
// Option is a functional option for configuring the linkCreator.
type Option func ( * linkCreator )
// NewSymlinkCreator creates a new linkCreator.
2023-01-24 09:06:21 +00:00
func NewSymlinkCreator ( opts ... Option ) ( Creator , error ) {
2023-01-19 14:24:00 +00:00
c := linkCreator { }
for _ , opt := range opts {
opt ( & c )
}
if c . logger == nil {
2023-03-22 12:27:43 +00:00
c . logger = logger . New ( )
2023-01-19 14:24:00 +00:00
}
if c . driverRoot == "" {
c . driverRoot = "/"
}
2023-06-12 18:46:56 +00:00
if c . devRoot == "" {
c . devRoot = "/"
}
2023-01-19 14:24:00 +00:00
if c . devCharPath == "" {
c . devCharPath = defaultDevCharPath
}
2023-01-24 09:06:21 +00:00
2023-05-31 09:49:38 +00:00
if err := c . setup ( ) ; err != nil {
return nil , err
2023-05-31 09:16:30 +00:00
}
2023-01-24 09:06:21 +00:00
if c . createAll {
2023-06-12 18:46:56 +00:00
lister , err := newAllPossible ( c . logger , c . devRoot )
2023-01-24 09:06:21 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to create all possible device lister: %v" , err )
}
c . lister = lister
} else {
2023-06-12 18:46:56 +00:00
c . lister = existing { c . logger , c . devRoot }
2023-01-19 14:24:00 +00:00
}
2023-01-24 09:06:21 +00:00
return c , nil
2023-01-19 14:24:00 +00:00
}
2023-05-31 09:49:38 +00:00
func ( m linkCreator ) setup ( ) error {
if ! m . loadKernelModules && ! m . createDeviceNodes {
return nil
}
if m . loadKernelModules {
2023-06-12 18:46:56 +00:00
modules := nvmodules . New (
nvmodules . WithLogger ( m . logger ) ,
nvmodules . WithDryRun ( m . dryRun ) ,
nvmodules . WithRoot ( m . driverRoot ) ,
)
if err := modules . LoadAll ( ) ; err != nil {
2023-05-31 09:49:38 +00:00
return fmt . Errorf ( "failed to load NVIDIA kernel modules: %v" , err )
}
}
if m . createDeviceNodes {
2023-06-12 18:46:56 +00:00
devices , err := nvdevices . New (
nvdevices . WithLogger ( m . logger ) ,
nvdevices . WithDryRun ( m . dryRun ) ,
nvdevices . WithDevRoot ( m . devRoot ) ,
)
if err != nil {
return err
}
if err := devices . CreateNVIDIAControlDevices ( ) ; err != nil {
2023-05-31 09:49:38 +00:00
return fmt . Errorf ( "failed to create NVIDIA device nodes: %v" , err )
}
}
return nil
}
2023-01-19 14:24:00 +00:00
// WithDriverRoot sets the driver root path.
2023-06-12 18:46:56 +00:00
// This is the path in which kernel modules must be loaded.
2023-01-19 14:24:00 +00:00
func WithDriverRoot ( root string ) Option {
return func ( c * linkCreator ) {
c . driverRoot = root
}
}
2023-06-12 18:46:56 +00:00
// WithDevRoot sets the root path for the /dev directory.
func WithDevRoot ( root string ) Option {
return func ( c * linkCreator ) {
c . devRoot = root
}
}
2023-01-19 14:24:00 +00:00
// WithDevCharPath sets the path at which the symlinks will be created.
func WithDevCharPath ( path string ) Option {
return func ( c * linkCreator ) {
c . devCharPath = path
}
}
// WithDryRun sets the dry run flag.
func WithDryRun ( dryRun bool ) Option {
return func ( c * linkCreator ) {
c . dryRun = dryRun
}
}
// WithLogger sets the logger.
2023-03-22 12:27:43 +00:00
func WithLogger ( logger logger . Interface ) Option {
2023-01-19 14:24:00 +00:00
return func ( c * linkCreator ) {
c . logger = logger
}
}
2023-01-24 09:06:21 +00:00
// WithCreateAll sets the createAll flag for the linkCreator.
func WithCreateAll ( createAll bool ) Option {
return func ( lc * linkCreator ) {
lc . createAll = createAll
}
}
2023-05-31 09:16:30 +00:00
// WithLoadKernelModules sets the loadKernelModules flag for the linkCreator.
func WithLoadKernelModules ( loadKernelModules bool ) Option {
return func ( lc * linkCreator ) {
lc . loadKernelModules = loadKernelModules
}
}
2023-05-31 09:49:38 +00:00
// WithCreateDeviceNodes sets the createDeviceNodes flag for the linkCreator.
func WithCreateDeviceNodes ( createDeviceNodes bool ) Option {
return func ( lc * linkCreator ) {
lc . createDeviceNodes = createDeviceNodes
}
}
2023-01-24 09:06:21 +00:00
// CreateLinks creates symlinks for all NVIDIA device nodes found in the driver root.
2023-01-19 14:24:00 +00:00
func ( m linkCreator ) CreateLinks ( ) error {
deviceNodes , err := m . lister . DeviceNodes ( )
if err != nil {
return fmt . Errorf ( "failed to get device nodes: %v" , err )
}
if len ( deviceNodes ) != 0 && ! m . dryRun {
err := os . MkdirAll ( m . devCharPath , 0755 )
if err != nil {
return fmt . Errorf ( "failed to create directory %s: %v" , m . devCharPath , err )
}
}
for _ , deviceNode := range deviceNodes {
target := deviceNode . path
linkPath := filepath . Join ( m . devCharPath , deviceNode . devCharName ( ) )
m . logger . Infof ( "Creating link %s => %s" , linkPath , target )
if m . dryRun {
continue
}
err = os . Symlink ( target , linkPath )
if err != nil {
2023-06-06 19:46:38 +00:00
m . logger . Warningf ( "Could not create symlink: %v" , err )
2023-01-19 14:24:00 +00:00
}
}
return nil
}
type deviceNode struct {
path string
major uint32
minor uint32
}
func ( d deviceNode ) devCharName ( ) string {
return fmt . Sprintf ( "%d:%d" , d . major , d . minor )
}
2023-01-23 15:36:40 +00:00
func newFSWatcher ( files ... string ) ( * fsnotify . Watcher , error ) {
watcher , err := fsnotify . NewWatcher ( )
if err != nil {
return nil , err
}
for _ , f := range files {
err = watcher . Add ( f )
if err != nil {
watcher . Close ( )
return nil , err
}
}
return watcher , nil
}
func newOSWatcher ( sigs ... os . Signal ) chan os . Signal {
sigChan := make ( chan os . Signal , 1 )
signal . Notify ( sigChan , sigs ... )
return sigChan
}