mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-10 15:25:33 +00:00
Create device nodes in JIT-CDI mode
Some checks failed
Some checks failed
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
76b6d4d38f
commit
1cfaef4b01
@ -45,7 +45,7 @@ func New(opts ...Option) Devices {
|
||||
type Option func(*builder)
|
||||
|
||||
// WithDeviceToMajor specifies an explicit device name to major number map.
|
||||
func WithDeviceToMajor(deviceToMajor map[string]int) Option {
|
||||
func WithDeviceToMajor(deviceToMajor map[string]uint32) Option {
|
||||
return func(b *builder) {
|
||||
b.asMap = make(devices)
|
||||
for name, major := range deviceToMajor {
|
||||
|
@ -45,7 +45,7 @@ const (
|
||||
type Name string
|
||||
|
||||
// Major represents a device major as specified under /proc/devices
|
||||
type Major int
|
||||
type Major uint32
|
||||
|
||||
// Devices represents the set of devices under /proc/devices
|
||||
//
|
||||
@ -130,8 +130,8 @@ func nvidiaDeviceFrom(reader io.Reader) (Devices, error) {
|
||||
return nvidiaDevices, nil
|
||||
}
|
||||
|
||||
func devicesFrom(reader io.Reader) map[string]int {
|
||||
allDevices := make(map[string]int)
|
||||
func devicesFrom(reader io.Reader) map[string]uint32 {
|
||||
allDevices := make(map[string]uint32)
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
device, major, err := processProcDeviceLine(scanner.Text())
|
||||
@ -143,11 +143,11 @@ func devicesFrom(reader io.Reader) map[string]int {
|
||||
return allDevices
|
||||
}
|
||||
|
||||
func processProcDeviceLine(line string) (string, int, error) {
|
||||
func processProcDeviceLine(line string) (string, uint32, error) {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
|
||||
var name string
|
||||
var major int
|
||||
var major uint32
|
||||
|
||||
n, _ := fmt.Sscanf(trimmed, "%d %s", &major, &name)
|
||||
if n == 2 {
|
||||
|
@ -22,12 +22,15 @@ import (
|
||||
|
||||
"tags.cncf.io/container-device-interface/pkg/parser"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
)
|
||||
@ -198,12 +201,14 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, drive
|
||||
logger.Warningf("Ignoring error(s) loading kernel modules: %v", err)
|
||||
}
|
||||
|
||||
identifiers := []string{}
|
||||
var identifiers []string
|
||||
for _, device := range devices {
|
||||
_, _, id := parser.ParseDevice(device)
|
||||
identifiers = append(identifiers, id)
|
||||
}
|
||||
|
||||
tryCreateDeviceNodes(logger, driver, identifiers...)
|
||||
|
||||
deviceSpecs, err := cdilib.GetDeviceSpecsByID(identifiers...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CDI device specs: %w", err)
|
||||
@ -221,3 +226,27 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, drive
|
||||
spec.WithClass("gpu"),
|
||||
)
|
||||
}
|
||||
|
||||
func tryCreateDeviceNodes(logger logger.Interface, driver *root.Driver, identifiers ...string) {
|
||||
devices, err := nvdevices.New(
|
||||
nvdevices.WithLogger(logger),
|
||||
nvdevices.WithDevRoot(driver.Root),
|
||||
)
|
||||
if err != nil {
|
||||
logger.Warningf("Failed to create devices library: %v", err)
|
||||
return
|
||||
}
|
||||
if err := devices.CreateNVIDIAControlDevices(); err != nil {
|
||||
logger.Warningf("Failed to create control devices: %v", err)
|
||||
}
|
||||
if err := devices.CreateNVIDIACapsControlDeviceNodes(); err != nil {
|
||||
logger.Warningf("Failed to create nvidia-caps control devices: %v", err)
|
||||
}
|
||||
|
||||
for _, id := range identifiers {
|
||||
identifier := device.Identifier(id)
|
||||
if err := devices.CreateDeviceNodes(identifier); err != nil {
|
||||
logger.Warningf("Error creating device nodes for %v: %v", identifier, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ const (
|
||||
)
|
||||
|
||||
// MigMinor represents the minor number of a MIG device
|
||||
type MigMinor int
|
||||
type MigMinor uint32
|
||||
|
||||
// MigCap represents the path to a MIG cap file.
|
||||
// These are listed in /proc/driver/nvidia-caps/mig-minors and have one of the
|
||||
@ -144,7 +144,7 @@ func processMigMinorsLine(line string) (MigCap, MigMinor, error) {
|
||||
return "", 0, fmt.Errorf("invalid MIG minors line: '%v'", line)
|
||||
}
|
||||
|
||||
minor, err := strconv.Atoi(parts[1])
|
||||
minor, err := strconv.ParseUint(parts[1], 10, 32)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("error reading MIG minor from '%v': %v", line, err)
|
||||
}
|
||||
|
@ -4,9 +4,8 @@
|
||||
package oci
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that SpecMock does implement Spec.
|
||||
|
@ -59,7 +59,7 @@ func (m *Interface) CreateNVIDIACapsControlDeviceNodes() error {
|
||||
continue
|
||||
}
|
||||
deviceNodePath := migMinor.DevicePath()
|
||||
if err := m.createDeviceNode(deviceNodePath, int(capsMajor), int(migMinor)); err != nil {
|
||||
if err := m.createDeviceNode(deviceNodePath, capsMajor, uint32(migMinor)); err != nil {
|
||||
errs = errors.Join(errs, fmt.Errorf("failed to create nvidia-caps device node %v: %w", deviceNodePath, err))
|
||||
}
|
||||
}
|
||||
@ -82,12 +82,12 @@ func (m *Interface) createControlDeviceNode(node controlDeviceNode) error {
|
||||
return fmt.Errorf("failed to determine minor: %w", err)
|
||||
}
|
||||
|
||||
return m.createDeviceNode(node.path(), int(major), int(minor))
|
||||
return m.createDeviceNode(node.path(), major, minor)
|
||||
}
|
||||
|
||||
// controlDeviceNodeMajor returns the major number for the specified NVIDIA control device node.
|
||||
// If the device node is not supported, an error is returned.
|
||||
func (m *Interface) controlDeviceNodeMajor(node controlDeviceNode) (int64, error) {
|
||||
func (m *Interface) controlDeviceNodeMajor(node controlDeviceNode) (devices.Major, error) {
|
||||
var valid bool
|
||||
var major devices.Major
|
||||
switch node {
|
||||
@ -98,7 +98,7 @@ func (m *Interface) controlDeviceNodeMajor(node controlDeviceNode) (int64, error
|
||||
}
|
||||
|
||||
if valid {
|
||||
return int64(major), nil
|
||||
return major, nil
|
||||
}
|
||||
|
||||
return 0, errInvalidDeviceNode
|
||||
@ -106,7 +106,7 @@ func (m *Interface) controlDeviceNodeMajor(node controlDeviceNode) (int64, error
|
||||
|
||||
// controlDeviceNodeMinor returns the minor number for the specified NVIDIA control device node.
|
||||
// If the device node is not supported, an error is returned.
|
||||
func (m *Interface) controlDeviceNodeMinor(node controlDeviceNode) (int64, error) {
|
||||
func (m *Interface) controlDeviceNodeMinor(node controlDeviceNode) (uint32, error) {
|
||||
switch node {
|
||||
case "nvidia-modeset":
|
||||
return devices.NVIDIAModesetMinor, nil
|
||||
|
@ -89,25 +89,25 @@ func New(opts ...Option) (*Interface, error) {
|
||||
func (m *Interface) CreateDeviceNodes(id device.Identifier) error {
|
||||
switch {
|
||||
case id.IsGpuIndex():
|
||||
index, err := strconv.Atoi(string(id))
|
||||
index, err := strconv.ParseUint(string(id), 10, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid GPU index: %v", id)
|
||||
}
|
||||
return m.createGPUDeviceNode(index)
|
||||
return m.createGPUDeviceNode(uint32(index))
|
||||
case id.IsMigIndex():
|
||||
indices := strings.Split(string(id), ":")
|
||||
if len(indices) != 2 {
|
||||
return fmt.Errorf("invalid MIG index %v", id)
|
||||
}
|
||||
gpuIndex, err := strconv.Atoi(indices[0])
|
||||
gpuIndex, err := strconv.ParseUint(indices[0], 10, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid parent index %v: %w", indices[0], err)
|
||||
}
|
||||
if err := m.createGPUDeviceNode(gpuIndex); err != nil {
|
||||
if err := m.createGPUDeviceNode(uint32(gpuIndex)); err != nil {
|
||||
return fmt.Errorf("failed to create parent device node: %w", err)
|
||||
}
|
||||
|
||||
return m.createMigDeviceNodes(gpuIndex)
|
||||
return m.createMigDeviceNodes(uint32(gpuIndex))
|
||||
case id.IsGpuUUID(), id.IsMigUUID(), id == "all":
|
||||
return m.createAllGPUDeviceNodes()
|
||||
default:
|
||||
@ -117,7 +117,7 @@ func (m *Interface) CreateDeviceNodes(id device.Identifier) error {
|
||||
|
||||
// createDeviceNode creates the specified device node with the require major and minor numbers.
|
||||
// If a devRoot is configured, this is prepended to the path.
|
||||
func (m *Interface) createDeviceNode(path string, major int, minor int) error {
|
||||
func (m *Interface) createDeviceNode(path string, major devices.Major, minor uint32) error {
|
||||
path = filepath.Join(m.devRoot, path)
|
||||
return m.Mknode(path, major, minor)
|
||||
return m.Mknode(path, uint32(major), minor)
|
||||
}
|
||||
|
@ -26,28 +26,28 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
|
||||
)
|
||||
|
||||
func (m *Interface) createGPUDeviceNode(gpuIndex int) error {
|
||||
func (m *Interface) createGPUDeviceNode(gpuIndex uint32) error {
|
||||
major, exists := m.Get(devices.NVIDIAGPU)
|
||||
if !exists {
|
||||
return fmt.Errorf("failed to determine device major; nvidia kernel module may not be loaded")
|
||||
}
|
||||
|
||||
deviceNodePath := fmt.Sprintf("/dev/nvidia%d", gpuIndex)
|
||||
if err := m.createDeviceNode(deviceNodePath, int(major), gpuIndex); err != nil {
|
||||
if err := m.createDeviceNode(deviceNodePath, major, uint32(gpuIndex)); err != nil {
|
||||
return fmt.Errorf("failed to create device node %v: %w", deviceNodePath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Interface) createMigDeviceNodes(gpuIndex int) error {
|
||||
func (m *Interface) createMigDeviceNodes(gpuIndex uint32) error {
|
||||
capsMajor, exists := m.Get("nvidia-caps")
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
var errs error
|
||||
for _, capsDeviceMinor := range m.migCaps.FilterForGPU(gpuIndex) {
|
||||
for _, capsDeviceMinor := range m.migCaps.FilterForGPU(int(gpuIndex)) {
|
||||
capDevicePath := capsDeviceMinor.DevicePath()
|
||||
err := m.createDeviceNode(capDevicePath, int(capsMajor), int(capsDeviceMinor))
|
||||
err := m.createDeviceNode(capDevicePath, capsMajor, uint32(capsDeviceMinor))
|
||||
errs = errors.Join(errs, fmt.Errorf("failed to create %v: %w", capDevicePath, err))
|
||||
}
|
||||
return errs
|
||||
@ -62,13 +62,13 @@ func (m *Interface) createAllGPUDeviceNodes() error {
|
||||
return fmt.Errorf("failed to get GPU information from PCI: %w", err)
|
||||
}
|
||||
|
||||
count := len(gpus)
|
||||
count := uint32(len(gpus))
|
||||
if count == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var errs error
|
||||
for gpuIndex := 0; gpuIndex < count; gpuIndex++ {
|
||||
for gpuIndex := uint32(0); gpuIndex < count; gpuIndex++ {
|
||||
errs = errors.Join(errs, m.createGPUDeviceNode(gpuIndex))
|
||||
errs = errors.Join(errs, m.createMigDeviceNodes(gpuIndex))
|
||||
}
|
||||
|
@ -25,16 +25,18 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
//go:generate moq -stub -out mknod_mock.go . mknoder
|
||||
type mint uint32
|
||||
|
||||
//go:generate moq -fmt=goimports -rm -stub -out mknod_mock.go . mknoder
|
||||
type mknoder interface {
|
||||
Mknode(string, int, int) error
|
||||
Mknode(string, uint32, uint32) error
|
||||
}
|
||||
|
||||
type mknodLogger struct {
|
||||
logger.Interface
|
||||
}
|
||||
|
||||
func (m *mknodLogger) Mknode(path string, major, minor int) error {
|
||||
func (m *mknodLogger) Mknode(path string, major uint32, minor uint32) error {
|
||||
m.Infof("Running: mknod --mode=0666 %s c %d %d", path, major, minor)
|
||||
return nil
|
||||
}
|
||||
@ -43,7 +45,7 @@ type mknodUnix struct {
|
||||
logger logger.Interface
|
||||
}
|
||||
|
||||
func (m *mknodUnix) Mknode(path string, major, minor int) error {
|
||||
func (m *mknodUnix) Mknode(path string, major uint32, minor uint32) error {
|
||||
// TODO: Ensure that the existing device node has the correct properties.
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
m.logger.Infof("Skipping: %s already exists", path)
|
||||
@ -52,7 +54,7 @@ func (m *mknodUnix) Mknode(path string, major, minor int) error {
|
||||
return fmt.Errorf("failed to stat %s: %v", path, err)
|
||||
}
|
||||
|
||||
err := unix.Mknod(path, unix.S_IFCHR, int(unix.Mkdev(uint32(major), uint32(minor))))
|
||||
err := unix.Mknod(path, unix.S_IFCHR, int(unix.Mkdev(major, minor)))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ var _ mknoder = &mknoderMock{}
|
||||
//
|
||||
// // make and configure a mocked mknoder
|
||||
// mockedmknoder := &mknoderMock{
|
||||
// MknodeFunc: func(s string, n1 int, n2 int) error {
|
||||
// MknodeFunc: func(s string, v1 uint32, v2 uint32) error {
|
||||
// panic("mock out the Mknode method")
|
||||
// },
|
||||
// }
|
||||
@ -28,7 +28,7 @@ var _ mknoder = &mknoderMock{}
|
||||
// }
|
||||
type mknoderMock struct {
|
||||
// MknodeFunc mocks the Mknode method.
|
||||
MknodeFunc func(s string, n1 int, n2 int) error
|
||||
MknodeFunc func(s string, v1 uint32, v2 uint32) error
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
@ -36,25 +36,25 @@ type mknoderMock struct {
|
||||
Mknode []struct {
|
||||
// S is the s argument value.
|
||||
S string
|
||||
// N1 is the n1 argument value.
|
||||
N1 int
|
||||
// N2 is the n2 argument value.
|
||||
N2 int
|
||||
// V1 is the v1 argument value.
|
||||
V1 uint32
|
||||
// V2 is the v2 argument value.
|
||||
V2 uint32
|
||||
}
|
||||
}
|
||||
lockMknode sync.RWMutex
|
||||
}
|
||||
|
||||
// Mknode calls MknodeFunc.
|
||||
func (mock *mknoderMock) Mknode(s string, n1 int, n2 int) error {
|
||||
func (mock *mknoderMock) Mknode(s string, v1 uint32, v2 uint32) error {
|
||||
callInfo := struct {
|
||||
S string
|
||||
N1 int
|
||||
N2 int
|
||||
V1 uint32
|
||||
V2 uint32
|
||||
}{
|
||||
S: s,
|
||||
N1: n1,
|
||||
N2: n2,
|
||||
V1: v1,
|
||||
V2: v2,
|
||||
}
|
||||
mock.lockMknode.Lock()
|
||||
mock.calls.Mknode = append(mock.calls.Mknode, callInfo)
|
||||
@ -65,7 +65,7 @@ func (mock *mknoderMock) Mknode(s string, n1 int, n2 int) error {
|
||||
)
|
||||
return errOut
|
||||
}
|
||||
return mock.MknodeFunc(s, n1, n2)
|
||||
return mock.MknodeFunc(s, v1, v2)
|
||||
}
|
||||
|
||||
// MknodeCalls gets all the calls that were made to Mknode.
|
||||
@ -74,13 +74,13 @@ func (mock *mknoderMock) Mknode(s string, n1 int, n2 int) error {
|
||||
// len(mockedmknoder.MknodeCalls())
|
||||
func (mock *mknoderMock) MknodeCalls() []struct {
|
||||
S string
|
||||
N1 int
|
||||
N2 int
|
||||
V1 uint32
|
||||
V2 uint32
|
||||
} {
|
||||
var calls []struct {
|
||||
S string
|
||||
N1 int
|
||||
N2 int
|
||||
V1 uint32
|
||||
V2 uint32
|
||||
}
|
||||
mock.lockMknode.RLock()
|
||||
calls = mock.calls.Mknode
|
||||
|
@ -4,9 +4,8 @@
|
||||
package nvcdi
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that nvmlUUIDerMock does implement nvmlUUIDer.
|
||||
|
Loading…
Reference in New Issue
Block a user