mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 00:08:11 +00:00
Merge branch 'create-device-nodes' into 'main'
Add nvidia-ctk system create-device-nodes command See merge request nvidia/container-toolkit/container-toolkit!362
This commit is contained in:
commit
7c5283bb97
107
cmd/nvidia-ctk/system/create-device-nodes/create-device-nodes.go
Normal file
107
cmd/nvidia-ctk/system/create-device-nodes/create-device-nodes.go
Normal file
@ -0,0 +1,107 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package createdevicenodes
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type options struct {
|
||||
driverRoot string
|
||||
|
||||
dryRun bool
|
||||
|
||||
control bool
|
||||
}
|
||||
|
||||
// NewCommand constructs a command sub-command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
opts := options{}
|
||||
|
||||
c := cli.Command{
|
||||
Name: "create-device-nodes",
|
||||
Usage: "A utility to create NVIDIA device ndoes",
|
||||
Before: func(c *cli.Context) error {
|
||||
return m.validateFlags(c, &opts)
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &opts)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "driver-root",
|
||||
Usage: "the path to the driver root. Device nodes will be created at `DRIVER_ROOT`/dev",
|
||||
Value: "/",
|
||||
Destination: &opts.driverRoot,
|
||||
EnvVars: []string{"DRIVER_ROOT"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "control-devices",
|
||||
Usage: "create all control device nodes: nvidiactl, nvidia-modeset, nvidia-uvm, nvidia-uvm-tools",
|
||||
Destination: &opts.control,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "if set, the command will not create any symlinks.",
|
||||
Value: false,
|
||||
Destination: &opts.dryRun,
|
||||
EnvVars: []string{"DRY_RUN"},
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) validateFlags(r *cli.Context, opts *options) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, opts *options) error {
|
||||
s, err := system.New(
|
||||
system.WithLogger(m.logger),
|
||||
system.WithDryRun(opts.dryRun),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create library: %v", err)
|
||||
}
|
||||
|
||||
if opts.control {
|
||||
m.logger.Infof("Creating control device nodes at %s", opts.driverRoot)
|
||||
if err := s.CreateNVIDIAControlDeviceNodesAt(opts.driverRoot); err != nil {
|
||||
return fmt.Errorf("failed to create control device nodes: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
@ -18,6 +18,7 @@ package system
|
||||
|
||||
import (
|
||||
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
|
||||
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
@ -43,6 +44,7 @@ func (m command) build() *cli.Command {
|
||||
|
||||
system.Subcommands = []*cli.Command{
|
||||
devchar.NewCommand(m.logger),
|
||||
devicenodes.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &system
|
||||
|
36
internal/system/options.go
Normal file
36
internal/system/options.go
Normal file
@ -0,0 +1,36 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package system
|
||||
|
||||
import "github.com/sirupsen/logrus"
|
||||
|
||||
// Option is a functional option for the system command
|
||||
type Option func(*Interface)
|
||||
|
||||
// WithLogger sets the logger for the system command
|
||||
func WithLogger(logger *logrus.Logger) Option {
|
||||
return func(i *Interface) {
|
||||
i.logger = logger
|
||||
}
|
||||
}
|
||||
|
||||
// WithDryRun sets the dry run flag
|
||||
func WithDryRun(dryRun bool) Option {
|
||||
return func(i *Interface) {
|
||||
i.dryRun = dryRun
|
||||
}
|
||||
}
|
149
internal/system/system.go
Normal file
149
internal/system/system.go
Normal file
@ -0,0 +1,149 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Interface is the interface for the system command
|
||||
type Interface struct {
|
||||
logger *logrus.Logger
|
||||
dryRun bool
|
||||
|
||||
nvidiaDevices nvidiaDevices
|
||||
}
|
||||
|
||||
// New constructs a system command with the specified options
|
||||
func New(opts ...Option) (*Interface, error) {
|
||||
i := &Interface{
|
||||
logger: logrus.StandardLogger(),
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(i)
|
||||
}
|
||||
|
||||
devices, err := devices.GetNVIDIADevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create devices info: %v", err)
|
||||
}
|
||||
i.nvidiaDevices = nvidiaDevices{devices}
|
||||
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// CreateNVIDIAControlDeviceNodesAt creates the NVIDIA control device nodes associated with the NVIDIA driver at the specified root.
|
||||
func (m *Interface) CreateNVIDIAControlDeviceNodesAt(root string) error {
|
||||
controlNodes := []string{"/dev/nvidiactl", "/dev/nvidia-modeset", "/dev/nvidia-uvm", "/dev/nvidia-uvm-tools"}
|
||||
|
||||
for _, node := range controlNodes {
|
||||
path := filepath.Join(root, node)
|
||||
err := m.CreateNVIDIADeviceNode(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create device node %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateNVIDIADeviceNode creates a specified device node associated with the NVIDIA driver.
|
||||
func (m *Interface) CreateNVIDIADeviceNode(path string) error {
|
||||
node := filepath.Base(path)
|
||||
if !strings.HasPrefix(node, "nvidia") {
|
||||
return fmt.Errorf("invalid device node %q", node)
|
||||
}
|
||||
|
||||
major, err := m.nvidiaDevices.Major(node)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determine major: %v", err)
|
||||
}
|
||||
|
||||
minor, err := m.nvidiaDevices.Minor(node)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determine minor: %v", err)
|
||||
}
|
||||
|
||||
return m.createDeviceNode(path, int(major), int(minor))
|
||||
}
|
||||
|
||||
func (m *Interface) createDeviceNode(path string, major int, minor int) error {
|
||||
if m.dryRun {
|
||||
m.logger.Infof("Running: mknod --mode=0666 %s c %d %d", path, major, minor)
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
m.logger.Infof("Skipping: %s already exists", path)
|
||||
return nil
|
||||
} else if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("failed to stat %s: %v", path, err)
|
||||
}
|
||||
|
||||
err := unix.Mknod(path, unix.S_IFCHR, int(unix.Mkdev(uint32(major), uint32(minor))))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chmod(path, 0666)
|
||||
}
|
||||
|
||||
type nvidiaDevices struct {
|
||||
devices.Devices
|
||||
}
|
||||
|
||||
// Major returns the major number for the specified NVIDIA device node.
|
||||
// If the device node is not supported, an error is returned.
|
||||
func (n *nvidiaDevices) Major(node string) (int64, error) {
|
||||
var valid bool
|
||||
var major devices.Major
|
||||
switch node {
|
||||
case "nvidia-uvm", "nvidia-uvm-tools":
|
||||
major, valid = n.Get(devices.NVIDIAUVM)
|
||||
case "nvidia-modeset", "nvidiactl":
|
||||
major, valid = n.Get(devices.NVIDIAGPU)
|
||||
}
|
||||
|
||||
if !valid {
|
||||
return 0, fmt.Errorf("invalid device node %q", node)
|
||||
}
|
||||
|
||||
return int64(major), nil
|
||||
}
|
||||
|
||||
// Minor returns the minor number for the specified NVIDIA device node.
|
||||
// If the device node is not supported, an error is returned.
|
||||
func (n *nvidiaDevices) Minor(node string) (int64, error) {
|
||||
switch node {
|
||||
case "nvidia-modeset":
|
||||
return devices.NVIDIAModesetMinor, nil
|
||||
case "nvidia-uvm-tools":
|
||||
return devices.NVIDIAUVMToolsMinor, nil
|
||||
case "nvidia-uvm":
|
||||
return devices.NVIDIAUVMMinor, nil
|
||||
case "nvidiactl":
|
||||
return devices.NVIDIACTLMinor, nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("invalid device node %q", node)
|
||||
}
|
@ -23,6 +23,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
@ -608,6 +609,16 @@ func generateCDISpec(opts *options, nvidiaCTKPath string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Infof("Creating control device nodes at %v", opts.DriverRootCtrPath)
|
||||
s, err := system.New()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create library: %v", err)
|
||||
}
|
||||
if err := s.CreateNVIDIAControlDeviceNodesAt(opts.DriverRootCtrPath); err != nil {
|
||||
return fmt.Errorf("failed to create control device nodes: %v", err)
|
||||
}
|
||||
|
||||
log.Info("Generating CDI spec for management containers")
|
||||
cdilib, err := nvcdi.New(
|
||||
nvcdi.WithMode(nvcdi.ModeManagement),
|
||||
nvcdi.WithDriverRoot(opts.DriverRootCtrPath),
|
||||
|
Loading…
Reference in New Issue
Block a user