mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-03 19:37:58 +00:00
add fallback logic when retrieving major number of the nvidia control device
Signed-off-by: Tariq Ibrahim <tibrahim@nvidia.com>
This commit is contained in:
parent
2f3600af9a
commit
f414ac2865
@ -33,7 +33,7 @@ const (
|
|||||||
NVIDIAModesetMinor = 254
|
NVIDIAModesetMinor = 254
|
||||||
|
|
||||||
NVIDIAFrontend = Name("nvidia-frontend")
|
NVIDIAFrontend = Name("nvidia-frontend")
|
||||||
NVIDIAGPU = NVIDIAFrontend
|
NVIDIAGPU = Name("nvidia")
|
||||||
NVIDIACaps = Name("nvidia-caps")
|
NVIDIACaps = Name("nvidia-caps")
|
||||||
NVIDIAUVM = Name("nvidia-uvm")
|
NVIDIAUVM = Name("nvidia-uvm")
|
||||||
|
|
||||||
@ -65,10 +65,25 @@ func (d devices) Exists(name Name) bool {
|
|||||||
return exists
|
return exists
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get a Device from Devices
|
// Get a Device from Devices. It also has fallback logic to ensure device name changes in /proc/devices are handled
|
||||||
|
// For e.g:- For GPU drivers 550.40.x or greater, the gpu device has been renamed from "nvidia-frontend" to "nvidia".
|
||||||
func (d devices) Get(name Name) (Major, bool) {
|
func (d devices) Get(name Name) (Major, bool) {
|
||||||
device, exists := d[name]
|
for _, n := range name.getWithFallback() {
|
||||||
return device, exists
|
device, exists := d[n]
|
||||||
|
if exists {
|
||||||
|
return device, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// getWithFallback returns a prioritised list of device names for a specific name.
|
||||||
|
// This allows multiple names to be associated with a single name to support various driver versions.
|
||||||
|
func (n Name) getWithFallback() []Name {
|
||||||
|
if n == NVIDIAGPU || n == NVIDIAFrontend {
|
||||||
|
return []Name{NVIDIAGPU, NVIDIAFrontend}
|
||||||
|
}
|
||||||
|
return []Name{n}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetNVIDIADevices returns the set of NVIDIA Devices on the machine
|
// GetNVIDIADevices returns the set of NVIDIA Devices on the machine
|
||||||
|
@ -41,6 +41,11 @@ func TestNvidiaDevices(t *testing.T) {
|
|||||||
}
|
}
|
||||||
_, exists := nvidiaDevices.Get("bogus")
|
_, exists := nvidiaDevices.Get("bogus")
|
||||||
require.False(t, exists, "Unexpected 'bogus' device found")
|
require.False(t, exists, "Unexpected 'bogus' device found")
|
||||||
|
|
||||||
|
// assert that nvidia and nvidia-frontend can be used interchangeably and have the device major numbers
|
||||||
|
m, exists := nvidiaDevices.Get("nvidia")
|
||||||
|
require.True(t, exists)
|
||||||
|
require.Equal(t, devices["nvidia-frontend"], m)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestProcessDeviceFile(t *testing.T) {
|
func TestProcessDeviceFile(t *testing.T) {
|
||||||
|
@ -31,11 +31,25 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
|
|
||||||
nvidiaDevices := &devices.DevicesMock{
|
nvidiaDevices := &devices.DevicesMock{
|
||||||
GetFunc: func(name devices.Name) (devices.Major, bool) {
|
GetFunc: func(name devices.Name) (devices.Major, bool) {
|
||||||
devices := map[devices.Name]devices.Major{
|
devs := map[devices.Name]devices.Major{
|
||||||
"nvidia-frontend": 195,
|
"nvidia-frontend": 195,
|
||||||
"nvidia-uvm": 243,
|
"nvidia-uvm": 243,
|
||||||
}
|
}
|
||||||
return devices[name], true
|
|
||||||
|
// devs550_40 represents the device map from the nvidia gpu drivers >= 550.40.x
|
||||||
|
devs550_40 := map[devices.Name]devices.Major{
|
||||||
|
"nvidia": 195,
|
||||||
|
"nvidia-uvm": 243,
|
||||||
|
}
|
||||||
|
|
||||||
|
d, ok := devs[name]
|
||||||
|
if ok {
|
||||||
|
return d, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// if device d is not found, fallback to the second mock device map
|
||||||
|
d, ok = devs550_40[name]
|
||||||
|
return d, ok
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -46,6 +60,7 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
root string
|
root string
|
||||||
devices devices.Devices
|
devices devices.Devices
|
||||||
mknodeError error
|
mknodeError error
|
||||||
|
hasError bool
|
||||||
expectedError error
|
expectedError error
|
||||||
expectedCalls []struct {
|
expectedCalls []struct {
|
||||||
S string
|
S string
|
||||||
@ -58,6 +73,7 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
root: "",
|
root: "",
|
||||||
devices: nvidiaDevices,
|
devices: nvidiaDevices,
|
||||||
mknodeError: nil,
|
mknodeError: nil,
|
||||||
|
hasError: false,
|
||||||
expectedCalls: []struct {
|
expectedCalls: []struct {
|
||||||
S string
|
S string
|
||||||
N1 int
|
N1 int
|
||||||
@ -73,6 +89,7 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
description: "some root specified",
|
description: "some root specified",
|
||||||
root: "/some/root",
|
root: "/some/root",
|
||||||
devices: nvidiaDevices,
|
devices: nvidiaDevices,
|
||||||
|
hasError: false,
|
||||||
mknodeError: nil,
|
mknodeError: nil,
|
||||||
expectedCalls: []struct {
|
expectedCalls: []struct {
|
||||||
S string
|
S string
|
||||||
@ -88,6 +105,7 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "mknod error returns error",
|
description: "mknod error returns error",
|
||||||
devices: nvidiaDevices,
|
devices: nvidiaDevices,
|
||||||
|
hasError: true,
|
||||||
mknodeError: mknodeError,
|
mknodeError: mknodeError,
|
||||||
expectedError: mknodeError,
|
expectedError: mknodeError,
|
||||||
// We expect the first call to this to fail, and the rest to be skipped
|
// We expect the first call to this to fail, and the rest to be skipped
|
||||||
@ -106,8 +124,24 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
return 0, false
|
return 0, false
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
hasError: true,
|
||||||
expectedError: errInvalidDeviceNode,
|
expectedError: errInvalidDeviceNode,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
description: "nvidia device renamed from nvidia-frontend to nvidia",
|
||||||
|
devices: nvidiaDevices,
|
||||||
|
hasError: false,
|
||||||
|
expectedCalls: []struct {
|
||||||
|
S string
|
||||||
|
N1 int
|
||||||
|
N2 int
|
||||||
|
}{
|
||||||
|
{"/dev/nvidiactl", 195, 255},
|
||||||
|
{"/dev/nvidia-modeset", 195, 254},
|
||||||
|
{"/dev/nvidia-uvm", 243, 0},
|
||||||
|
{"/dev/nvidia-uvm-tools", 243, 1},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
@ -126,9 +160,12 @@ func TestCreateControlDevices(t *testing.T) {
|
|||||||
d.mknoder = mknode
|
d.mknoder = mknode
|
||||||
|
|
||||||
err := d.CreateNVIDIAControlDevices()
|
err := d.CreateNVIDIAControlDevices()
|
||||||
require.ErrorIs(t, err, tc.expectedError)
|
if tc.hasError {
|
||||||
|
require.ErrorContains(t, err, tc.expectedError.Error())
|
||||||
|
} else {
|
||||||
|
require.Nil(t, err)
|
||||||
|
}
|
||||||
require.EqualValues(t, tc.expectedCalls, mknode.MknodeCalls())
|
require.EqualValues(t, tc.expectedCalls, mknode.MknodeCalls())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user