TOFIX
Some checks failed
CI Pipeline / code-scanning (push) Has been cancelled
CI Pipeline / variables (push) Has been cancelled
CI Pipeline / golang (push) Has been cancelled
CI Pipeline / image (push) Has been cancelled
CI Pipeline / e2e-test (push) Has been cancelled

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2025-03-12 12:43:39 +02:00
parent d4b331fbbb
commit 699608902b
No known key found for this signature in database
5 changed files with 39 additions and 30 deletions

View File

@ -25,7 +25,7 @@ import (
)
func TestNvidiaDevices(t *testing.T) {
perDriverDeviceMaps := map[string]map[string]int{
perDriverDeviceMaps := map[string]map[string]uint32{
"pre550": {
"nvidia-frontend": 195,
"nvidia-nvlink": 234,
@ -100,7 +100,7 @@ func TestProcessDeviceFileLine(t *testing.T) {
testCases := []struct {
line string
name string
major int
major uint32
err bool
}{
{"", "", 0, true},

View File

@ -20,7 +20,6 @@ import (
"errors"
"fmt"
"path/filepath"
"strconv"
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
@ -89,25 +88,25 @@ func New(opts ...Option) (*Interface, error) {
func (m *Interface) CreateDeviceNodes(id device.Identifier) error {
switch {
case id.IsGpuIndex():
index, err := strconv.ParseUint(string(id), 10, 32)
gpuIndex, err := toIndex(string(id))
if err != nil {
return fmt.Errorf("invalid GPU index: %v", id)
}
return m.createGPUDeviceNode(uint32(index))
return m.createGPUDeviceNode(gpuIndex)
case id.IsMigIndex():
indices := strings.Split(string(id), ":")
if len(indices) != 2 {
return fmt.Errorf("invalid MIG index %v", id)
}
gpuIndex, err := strconv.ParseUint(indices[0], 10, 32)
gpuIndex, err := toIndex(indices[0])
if err != nil {
return fmt.Errorf("invalid parent index %v: %w", indices[0], err)
}
if err := m.createGPUDeviceNode(uint32(gpuIndex)); err != nil {
if err := m.createGPUDeviceNode(gpuIndex); err != nil {
return fmt.Errorf("failed to create parent device node: %w", err)
}
return m.createMigDeviceNodes(uint32(gpuIndex))
return m.createMigDeviceNodes(gpuIndex)
case id.IsGpuUUID(), id.IsMigUUID(), id == "all":
return m.createAllGPUDeviceNodes()
default:

View File

@ -30,13 +30,13 @@ func TestCreateControlDevices(t *testing.T) {
logger, _ := testlog.NewNullLogger()
nvidiaDevices := devices.New(
devices.WithDeviceToMajor(map[string]int{
devices.WithDeviceToMajor(map[string]uint32{
"nvidia-frontend": 195,
"nvidia-uvm": 243,
}),
)
nvidia550Devices := devices.New(
devices.WithDeviceToMajor(map[string]int{
devices.WithDeviceToMajor(map[string]uint32{
"nvidia": 195,
"nvidia-uvm": 243,
}),
@ -52,8 +52,8 @@ func TestCreateControlDevices(t *testing.T) {
expectedError error
expectedCalls []struct {
S string
N1 int
N2 int
V1 uint32
V2 uint32
}
}{
{
@ -63,8 +63,8 @@ func TestCreateControlDevices(t *testing.T) {
mknodeError: nil,
expectedCalls: []struct {
S string
N1 int
N2 int
V1 uint32
V2 uint32
}{
{"/dev/nvidiactl", 195, 255},
{"/dev/nvidia-modeset", 195, 254},
@ -79,8 +79,8 @@ func TestCreateControlDevices(t *testing.T) {
mknodeError: nil,
expectedCalls: []struct {
S string
N1 int
N2 int
V1 uint32
V2 uint32
}{
{"/dev/nvidiactl", 195, 255},
{"/dev/nvidia-modeset", 195, 254},
@ -95,8 +95,8 @@ func TestCreateControlDevices(t *testing.T) {
mknodeError: nil,
expectedCalls: []struct {
S string
N1 int
N2 int
V1 uint32
V2 uint32
}{
{"/some/root/dev/nvidiactl", 195, 255},
{"/some/root/dev/nvidia-modeset", 195, 254},
@ -112,8 +112,8 @@ func TestCreateControlDevices(t *testing.T) {
// We expect the first call to this to fail, and the rest to be skipped
expectedCalls: []struct {
S string
N1 int
N2 int
V1 uint32
V2 uint32
}{
{"/dev/nvidiactl", 195, 255},
},
@ -132,7 +132,7 @@ func TestCreateControlDevices(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
mknode := &mknoderMock{
MknodeFunc: func(string, int, int) error {
MknodeFunc: func(string, uint32, uint32) error {
return tc.mknodeError
},
}

View File

@ -20,32 +20,44 @@ import (
"errors"
"fmt"
"path/filepath"
"strconv"
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
)
func (m *Interface) createGPUDeviceNode(gpuIndex uint32) error {
type gpuIndex nvcaps.Index
func toIndex(index string) (gpuIndex, error) {
i, err := strconv.ParseUint(index, 10, 32)
if err != nil {
return 0, err
}
return gpuIndex(i), nil
}
func (m *Interface) createGPUDeviceNode(gpu gpuIndex) error {
major, exists := m.Get(devices.NVIDIAGPU)
if !exists {
return fmt.Errorf("failed to determine device major; nvidia kernel module may not be loaded")
}
deviceNodePath := fmt.Sprintf("/dev/nvidia%d", gpuIndex)
if err := m.createDeviceNode(deviceNodePath, major, uint32(gpuIndex)); err != nil {
deviceNodePath := fmt.Sprintf("/dev/nvidia%d", gpu)
if err := m.createDeviceNode(deviceNodePath, major, uint32(gpu)); err != nil {
return fmt.Errorf("failed to create device node %v: %w", deviceNodePath, err)
}
return nil
}
func (m *Interface) createMigDeviceNodes(gpuIndex uint32) error {
func (m *Interface) createMigDeviceNodes(gpu gpuIndex) error {
capsMajor, exists := m.Get("nvidia-caps")
if !exists {
return nil
}
var errs error
for _, capsDeviceMinor := range m.migCaps.FilterForGPU(int(gpuIndex)) {
for _, capsDeviceMinor := range m.migCaps.FilterForGPU(nvcaps.Index(gpu)) {
capDevicePath := capsDeviceMinor.DevicePath()
err := m.createDeviceNode(capDevicePath, capsMajor, uint32(capsDeviceMinor))
errs = errors.Join(errs, fmt.Errorf("failed to create %v: %w", capDevicePath, err))
@ -62,13 +74,13 @@ func (m *Interface) createAllGPUDeviceNodes() error {
return fmt.Errorf("failed to get GPU information from PCI: %w", err)
}
count := uint32(len(gpus))
count := gpuIndex(len(gpus))
if count == 0 {
return nil
}
var errs error
for gpuIndex := uint32(0); gpuIndex < count; gpuIndex++ {
for gpuIndex := gpuIndex(0); gpuIndex < count; gpuIndex++ {
errs = errors.Join(errs, m.createGPUDeviceNode(gpuIndex))
errs = errors.Join(errs, m.createMigDeviceNodes(gpuIndex))
}

View File

@ -25,8 +25,6 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
type mint uint32
//go:generate moq -fmt=goimports -rm -stub -out mknod_mock.go . mknoder
type mknoder interface {
Mknode(string, uint32, uint32) error