Allow multiple device name strategies to be specified

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2023-03-21 15:51:36 +02:00
parent a8d48808d7
commit 52da12cf9a
14 changed files with 127 additions and 64 deletions

View File

@@ -48,8 +48,8 @@ type Interface interface {
GetCommonEdits() (*cdi.ContainerEdits, error)
GetAllDeviceSpecs() ([]specs.Device, error)
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error)
GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error)
GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error)
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error)
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error)
GetDeviceSpecsByID(...string) ([]specs.Device, error)
}

View File

@@ -34,23 +34,26 @@ import (
)
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
edits, err := l.GetGPUDeviceEdits(d)
if err != nil {
return nil, fmt.Errorf("failed to get edits for device: %v", err)
}
name, err := l.deviceNamer.GetDeviceName(i, convert{d})
var deviceSpecs []specs.Device
names, err := l.deviceNamers.GetDeviceNames(i, convert{d})
if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err)
}
spec := specs.Device{
Name: name,
ContainerEdits: *edits.ContainerEdits,
for _, name := range names {
spec := specs.Device{
Name: name,
ContainerEdits: *edits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, spec)
}
return &spec, nil
return deviceSpecs, nil
}
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.

View File

@@ -68,7 +68,7 @@ func (l *gdslib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
}
// GetGPUDeviceSpecs is unsupported for the gdslib specs
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
}
@@ -78,7 +78,7 @@ func (l *gdslib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
}
// GetMIGDeviceSpecs is unsupported for the gdslib specs
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
}

View File

@@ -58,16 +58,20 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
return nil, fmt.Errorf("failed to create container edits for CSV files: %v", err)
}
name, err := l.deviceNamer.GetDeviceName(0, uuidUnsupported{})
names, err := l.deviceNamers.GetDeviceNames(0, uuidIgnored{})
if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: name,
ContainerEdits: *e.ContainerEdits,
var deviceSpecs []specs.Device
for _, name := range names {
deviceSpec := specs.Device{
Name: name,
ContainerEdits: *e.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
}
return []specs.Device{deviceSpec}, nil
return deviceSpecs, nil
}
// GetCommonEdits generates a CDI specification that can be used for ANY devices
@@ -82,7 +86,7 @@ func (l *csvlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
}
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported for CSV files")
}
@@ -92,7 +96,7 @@ func (l *csvlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
}
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files")
}

View File

@@ -208,11 +208,11 @@ func (l *nvmllib) getEditsForMIGDevice(nvmlDevice nvml.Device) (*cdi.ContainerEd
func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
var deviceSpecs []specs.Device
err := l.devicelib.VisitDevices(func(i int, d device.Device) error {
deviceSpec, err := l.GetGPUDeviceSpecs(i, d)
specsForDevice, err := l.GetGPUDeviceSpecs(i, d)
if err != nil {
return err
}
deviceSpecs = append(deviceSpecs, *deviceSpec)
deviceSpecs = append(deviceSpecs, specsForDevice...)
return nil
})
@@ -225,11 +225,11 @@ func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
func (l *nvmllib) getMigDeviceSpecs() ([]specs.Device, error) {
var deviceSpecs []specs.Device
err := l.devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
deviceSpec, err := l.GetMIGDeviceSpecs(i, d, j, mig)
specsForDevice, err := l.GetMIGDeviceSpecs(i, d, j, mig)
if err != nil {
return err
}
deviceSpecs = append(deviceSpecs, *deviceSpec)
deviceSpecs = append(deviceSpecs, specsForDevice...)
return nil
})

View File

@@ -68,7 +68,7 @@ func (l *wsllib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
}
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported on WSL")
}
@@ -78,7 +78,7 @@ func (l *wsllib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
}
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported on WSL")
}

View File

@@ -44,7 +44,7 @@ type nvcdilib struct {
nvmllib nvml.Interface
mode string
devicelib device.Interface
deviceNamer DeviceNamer
deviceNamers DeviceNamers
driverRoot string
devRoot string
nvidiaCTKPath string
@@ -75,8 +75,9 @@ func New(opts ...Option) (Interface, error) {
if l.logger == nil {
l.logger = logger.New()
}
if l.deviceNamer == nil {
l.deviceNamer, _ = NewDeviceNamer(DeviceNameStrategyIndex)
if len(l.deviceNamers) == 0 {
indexNamer, _ := NewDeviceNamer(DeviceNameStrategyIndex)
l.deviceNamers = []DeviceNamer{indexNamer}
}
if l.driverRoot == "" {
l.driverRoot = "/"

View File

@@ -175,7 +175,7 @@ func (m *managementlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, e
}
// GetGPUDeviceSpecs is unsupported for the managementlib specs
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
}
@@ -185,7 +185,7 @@ func (m *managementlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi
}
// GetMIGDeviceSpecs is unsupported for the managementlib specs
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
}

View File

@@ -31,23 +31,25 @@ import (
)
// GetMIGDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *nvmllib) GetMIGDeviceSpecs(i int, d device.Device, j int, mig device.MigDevice) (*specs.Device, error) {
func (l *nvmllib) GetMIGDeviceSpecs(i int, d device.Device, j int, mig device.MigDevice) ([]specs.Device, error) {
edits, err := l.GetMIGDeviceEdits(d, mig)
if err != nil {
return nil, fmt.Errorf("failed to get edits for device: %v", err)
}
name, err := l.deviceNamer.GetMigDeviceName(i, convert{d}, j, convert{mig})
names, err := l.deviceNamers.GetMigDeviceNames(i, convert{d}, j, convert{mig})
if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err)
}
spec := specs.Device{
Name: name,
ContainerEdits: *edits.ContainerEdits,
var deviceSpecs []specs.Device
for _, name := range names {
spec := specs.Device{
Name: name,
ContainerEdits: *edits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, spec)
}
return &spec, nil
return deviceSpecs, nil
}
// GetMIGDeviceEdits returns the CDI edits for the MIG device represented by 'mig' on 'parent'.

View File

@@ -68,7 +68,7 @@ func (l *mofedlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
}
// GetGPUDeviceSpecs is unsupported for the mofedlib specs
func (l *mofedlib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
func (l *mofedlib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
}
@@ -78,7 +78,7 @@ func (l *mofedlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Cont
}
// GetMIGDeviceSpecs is unsupported for the mofedlib specs
func (l *mofedlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
func (l *mofedlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
}

View File

@@ -28,6 +28,9 @@ type UUIDer interface {
GetUUID() (string, error)
}
// DeviceNamers represents a list of device namers
type DeviceNamers []DeviceNamer
// DeviceNamer is an interface for getting device names
type DeviceNamer interface {
GetDeviceName(int, UUIDer) (string, error)
@@ -102,6 +105,12 @@ type convert struct {
nvmlUUIDer
}
type uuidIgnored struct{}
func (m uuidIgnored) GetUUID() (string, error) {
return "", nil
}
type uuidUnsupported struct{}
func (m convert) GetUUID() (string, error) {
@@ -120,3 +129,39 @@ var errUUIDUnsupported = errors.New("GetUUID is not supported")
func (m uuidUnsupported) GetUUID() (string, error) {
return "", errUUIDUnsupported
}
func (l DeviceNamers) GetDeviceNames(i int, d UUIDer) ([]string, error) {
var names []string
for _, namer := range l {
name, err := namer.GetDeviceName(i, d)
if err != nil {
return nil, err
}
if name == "" {
continue
}
names = append(names, name)
}
if len(names) == 0 {
return nil, errors.New("no names defined")
}
return names, nil
}
func (l DeviceNamers) GetMigDeviceNames(i int, d UUIDer, j int, mig UUIDer) ([]string, error) {
var names []string
for _, namer := range l {
name, err := namer.GetMigDeviceName(i, d, j, mig)
if err != nil {
return nil, err
}
if name == "" {
continue
}
names = append(names, name)
}
if len(names) == 0 {
return nil, errors.New("no names defined")
}
return names, nil
}

View File

@@ -34,10 +34,10 @@ func WithDeviceLib(devicelib device.Interface) Option {
}
}
// WithDeviceNamer sets the device namer for the library
func WithDeviceNamer(namer DeviceNamer) Option {
// WithDeviceNamers sets the device namer for the library
func WithDeviceNamers(namers ...DeviceNamer) Option {
return func(l *nvcdilib) {
l.deviceNamer = namer
l.deviceNamers = namers
}
}