mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-11 17:11:39 +00:00
TOFIX
Some checks failed
Some checks failed
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
d4b331fbbb
commit
699608902b
@ -25,7 +25,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestNvidiaDevices(t *testing.T) {
|
func TestNvidiaDevices(t *testing.T) {
|
||||||
perDriverDeviceMaps := map[string]map[string]int{
|
perDriverDeviceMaps := map[string]map[string]uint32{
|
||||||
"pre550": {
|
"pre550": {
|
||||||
"nvidia-frontend": 195,
|
"nvidia-frontend": 195,
|
||||||
"nvidia-nvlink": 234,
|
"nvidia-nvlink": 234,
|
||||||
@ -100,7 +100,7 @@ func TestProcessDeviceFileLine(t *testing.T) {
|
|||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
line string
|
line string
|
||||||
name string
|
name string
|
||||||
major int
|
major uint32
|
||||||
err bool
|
err bool
|
||||||
}{
|
}{
|
||||||
{"", "", 0, true},
|
{"", "", 0, true},
|
||||||
|
@ -20,7 +20,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||||
@ -89,25 +88,25 @@ func New(opts ...Option) (*Interface, error) {
|
|||||||
func (m *Interface) CreateDeviceNodes(id device.Identifier) error {
|
func (m *Interface) CreateDeviceNodes(id device.Identifier) error {
|
||||||
switch {
|
switch {
|
||||||
case id.IsGpuIndex():
|
case id.IsGpuIndex():
|
||||||
index, err := strconv.ParseUint(string(id), 10, 32)
|
gpuIndex, err := toIndex(string(id))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("invalid GPU index: %v", id)
|
return fmt.Errorf("invalid GPU index: %v", id)
|
||||||
}
|
}
|
||||||
return m.createGPUDeviceNode(uint32(index))
|
return m.createGPUDeviceNode(gpuIndex)
|
||||||
case id.IsMigIndex():
|
case id.IsMigIndex():
|
||||||
indices := strings.Split(string(id), ":")
|
indices := strings.Split(string(id), ":")
|
||||||
if len(indices) != 2 {
|
if len(indices) != 2 {
|
||||||
return fmt.Errorf("invalid MIG index %v", id)
|
return fmt.Errorf("invalid MIG index %v", id)
|
||||||
}
|
}
|
||||||
gpuIndex, err := strconv.ParseUint(indices[0], 10, 32)
|
gpuIndex, err := toIndex(indices[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("invalid parent index %v: %w", indices[0], err)
|
return fmt.Errorf("invalid parent index %v: %w", indices[0], err)
|
||||||
}
|
}
|
||||||
if err := m.createGPUDeviceNode(uint32(gpuIndex)); err != nil {
|
if err := m.createGPUDeviceNode(gpuIndex); err != nil {
|
||||||
return fmt.Errorf("failed to create parent device node: %w", err)
|
return fmt.Errorf("failed to create parent device node: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return m.createMigDeviceNodes(uint32(gpuIndex))
|
return m.createMigDeviceNodes(gpuIndex)
|
||||||
case id.IsGpuUUID(), id.IsMigUUID(), id == "all":
|
case id.IsGpuUUID(), id.IsMigUUID(), id == "all":
|
||||||
return m.createAllGPUDeviceNodes()
|
return m.createAllGPUDeviceNodes()
|
||||||
default:
|
default:
|
||||||
|
@ -30,13 +30,13 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
logger, _ := testlog.NewNullLogger()
|
logger, _ := testlog.NewNullLogger()
|
||||||
|
|
||||||
nvidiaDevices := devices.New(
|
nvidiaDevices := devices.New(
|
||||||
devices.WithDeviceToMajor(map[string]int{
|
devices.WithDeviceToMajor(map[string]uint32{
|
||||||
"nvidia-frontend": 195,
|
"nvidia-frontend": 195,
|
||||||
"nvidia-uvm": 243,
|
"nvidia-uvm": 243,
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
nvidia550Devices := devices.New(
|
nvidia550Devices := devices.New(
|
||||||
devices.WithDeviceToMajor(map[string]int{
|
devices.WithDeviceToMajor(map[string]uint32{
|
||||||
"nvidia": 195,
|
"nvidia": 195,
|
||||||
"nvidia-uvm": 243,
|
"nvidia-uvm": 243,
|
||||||
}),
|
}),
|
||||||
@ -52,8 +52,8 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
expectedError error
|
expectedError error
|
||||||
expectedCalls []struct {
|
expectedCalls []struct {
|
||||||
S string
|
S string
|
||||||
N1 int
|
V1 uint32
|
||||||
N2 int
|
V2 uint32
|
||||||
}
|
}
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
@ -63,8 +63,8 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
mknodeError: nil,
|
mknodeError: nil,
|
||||||
expectedCalls: []struct {
|
expectedCalls: []struct {
|
||||||
S string
|
S string
|
||||||
N1 int
|
V1 uint32
|
||||||
N2 int
|
V2 uint32
|
||||||
}{
|
}{
|
||||||
{"/dev/nvidiactl", 195, 255},
|
{"/dev/nvidiactl", 195, 255},
|
||||||
{"/dev/nvidia-modeset", 195, 254},
|
{"/dev/nvidia-modeset", 195, 254},
|
||||||
@ -79,8 +79,8 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
mknodeError: nil,
|
mknodeError: nil,
|
||||||
expectedCalls: []struct {
|
expectedCalls: []struct {
|
||||||
S string
|
S string
|
||||||
N1 int
|
V1 uint32
|
||||||
N2 int
|
V2 uint32
|
||||||
}{
|
}{
|
||||||
{"/dev/nvidiactl", 195, 255},
|
{"/dev/nvidiactl", 195, 255},
|
||||||
{"/dev/nvidia-modeset", 195, 254},
|
{"/dev/nvidia-modeset", 195, 254},
|
||||||
@ -95,8 +95,8 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
mknodeError: nil,
|
mknodeError: nil,
|
||||||
expectedCalls: []struct {
|
expectedCalls: []struct {
|
||||||
S string
|
S string
|
||||||
N1 int
|
V1 uint32
|
||||||
N2 int
|
V2 uint32
|
||||||
}{
|
}{
|
||||||
{"/some/root/dev/nvidiactl", 195, 255},
|
{"/some/root/dev/nvidiactl", 195, 255},
|
||||||
{"/some/root/dev/nvidia-modeset", 195, 254},
|
{"/some/root/dev/nvidia-modeset", 195, 254},
|
||||||
@ -112,8 +112,8 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
// We expect the first call to this to fail, and the rest to be skipped
|
// We expect the first call to this to fail, and the rest to be skipped
|
||||||
expectedCalls: []struct {
|
expectedCalls: []struct {
|
||||||
S string
|
S string
|
||||||
N1 int
|
V1 uint32
|
||||||
N2 int
|
V2 uint32
|
||||||
}{
|
}{
|
||||||
{"/dev/nvidiactl", 195, 255},
|
{"/dev/nvidiactl", 195, 255},
|
||||||
},
|
},
|
||||||
@ -132,7 +132,7 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
mknode := &mknoderMock{
|
mknode := &mknoderMock{
|
||||||
MknodeFunc: func(string, int, int) error {
|
MknodeFunc: func(string, uint32, uint32) error {
|
||||||
return tc.mknodeError
|
return tc.mknodeError
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -20,32 +20,44 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
|
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (m *Interface) createGPUDeviceNode(gpuIndex uint32) error {
|
type gpuIndex nvcaps.Index
|
||||||
|
|
||||||
|
func toIndex(index string) (gpuIndex, error) {
|
||||||
|
i, err := strconv.ParseUint(index, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return gpuIndex(i), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Interface) createGPUDeviceNode(gpu gpuIndex) error {
|
||||||
major, exists := m.Get(devices.NVIDIAGPU)
|
major, exists := m.Get(devices.NVIDIAGPU)
|
||||||
if !exists {
|
if !exists {
|
||||||
return fmt.Errorf("failed to determine device major; nvidia kernel module may not be loaded")
|
return fmt.Errorf("failed to determine device major; nvidia kernel module may not be loaded")
|
||||||
}
|
}
|
||||||
|
|
||||||
deviceNodePath := fmt.Sprintf("/dev/nvidia%d", gpuIndex)
|
deviceNodePath := fmt.Sprintf("/dev/nvidia%d", gpu)
|
||||||
if err := m.createDeviceNode(deviceNodePath, major, uint32(gpuIndex)); err != nil {
|
if err := m.createDeviceNode(deviceNodePath, major, uint32(gpu)); err != nil {
|
||||||
return fmt.Errorf("failed to create device node %v: %w", deviceNodePath, err)
|
return fmt.Errorf("failed to create device node %v: %w", deviceNodePath, err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Interface) createMigDeviceNodes(gpuIndex uint32) error {
|
func (m *Interface) createMigDeviceNodes(gpu gpuIndex) error {
|
||||||
capsMajor, exists := m.Get("nvidia-caps")
|
capsMajor, exists := m.Get("nvidia-caps")
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
var errs error
|
var errs error
|
||||||
for _, capsDeviceMinor := range m.migCaps.FilterForGPU(int(gpuIndex)) {
|
for _, capsDeviceMinor := range m.migCaps.FilterForGPU(nvcaps.Index(gpu)) {
|
||||||
capDevicePath := capsDeviceMinor.DevicePath()
|
capDevicePath := capsDeviceMinor.DevicePath()
|
||||||
err := m.createDeviceNode(capDevicePath, capsMajor, uint32(capsDeviceMinor))
|
err := m.createDeviceNode(capDevicePath, capsMajor, uint32(capsDeviceMinor))
|
||||||
errs = errors.Join(errs, fmt.Errorf("failed to create %v: %w", capDevicePath, err))
|
errs = errors.Join(errs, fmt.Errorf("failed to create %v: %w", capDevicePath, err))
|
||||||
@ -62,13 +74,13 @@ func (m *Interface) createAllGPUDeviceNodes() error {
|
|||||||
return fmt.Errorf("failed to get GPU information from PCI: %w", err)
|
return fmt.Errorf("failed to get GPU information from PCI: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
count := uint32(len(gpus))
|
count := gpuIndex(len(gpus))
|
||||||
if count == 0 {
|
if count == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var errs error
|
var errs error
|
||||||
for gpuIndex := uint32(0); gpuIndex < count; gpuIndex++ {
|
for gpuIndex := gpuIndex(0); gpuIndex < count; gpuIndex++ {
|
||||||
errs = errors.Join(errs, m.createGPUDeviceNode(gpuIndex))
|
errs = errors.Join(errs, m.createGPUDeviceNode(gpuIndex))
|
||||||
errs = errors.Join(errs, m.createMigDeviceNodes(gpuIndex))
|
errs = errors.Join(errs, m.createMigDeviceNodes(gpuIndex))
|
||||||
}
|
}
|
||||||
|
@ -25,8 +25,6 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
type mint uint32
|
|
||||||
|
|
||||||
//go:generate moq -fmt=goimports -rm -stub -out mknod_mock.go . mknoder
|
//go:generate moq -fmt=goimports -rm -stub -out mknod_mock.go . mknoder
|
||||||
type mknoder interface {
|
type mknoder interface {
|
||||||
Mknode(string, uint32, uint32) error
|
Mknode(string, uint32, uint32) error
|
||||||
|
Loading…
Reference in New Issue
Block a user