Allow multiple device name strategies to be specified

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2023-03-21 15:51:36 +02:00
parent a8d48808d7
commit 52da12cf9a
14 changed files with 127 additions and 64 deletions

View File

@ -2,6 +2,8 @@
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed. * Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`. * Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
CDI spec to be generated that includes GPUs by index and UUID.
## v1.15.0-rc.3 ## v1.15.0-rc.3
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied. * Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.

View File

@ -44,7 +44,7 @@ type command struct {
type options struct { type options struct {
output string output string
format string format string
deviceNameStrategy string deviceNameStrategies cli.StringSlice
driverRoot string driverRoot string
devRoot string devRoot string
nvidiaCTKPath string nvidiaCTKPath string
@ -109,11 +109,11 @@ func (m command) build() *cli.Command {
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.", Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
Destination: &opts.devRoot, Destination: &opts.devRoot,
}, },
&cli.StringFlag{ &cli.StringSliceFlag{
Name: "device-name-strategy", Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]", Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
Value: nvcdi.DeviceNameStrategyIndex, Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex),
Destination: &opts.deviceNameStrategy, Destination: &opts.deviceNameStrategies,
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "driver-root", Name: "driver-root",
@ -185,10 +185,12 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
return fmt.Errorf("invalid discovery mode: %v", opts.mode) return fmt.Errorf("invalid discovery mode: %v", opts.mode)
} }
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy) for _, strategy := range opts.deviceNameStrategies.Value() {
_, err := nvcdi.NewDeviceNamer(strategy)
if err != nil { if err != nil {
return err return err
} }
}
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath) opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
@ -241,10 +243,14 @@ func formatFromFilename(filename string) string {
} }
func (m command) generateSpec(opts *options) (spec.Interface, error) { func (m command) generateSpec(opts *options) (spec.Interface, error) {
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy) var deviceNamers []nvcdi.DeviceNamer
for _, strategy := range opts.deviceNameStrategies.Value() {
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err) return nil, fmt.Errorf("failed to create device namer: %v", err)
} }
deviceNamers = append(deviceNamers, deviceNamer)
}
cdilib, err := nvcdi.New( cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger), nvcdi.WithLogger(m.logger),
@ -252,7 +258,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
nvcdi.WithDevRoot(opts.devRoot), nvcdi.WithDevRoot(opts.devRoot),
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath), nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
nvcdi.WithLdconfigPath(opts.ldconfigPath), nvcdi.WithLdconfigPath(opts.ldconfigPath),
nvcdi.WithDeviceNamer(deviceNamer), nvcdi.WithDeviceNamers(deviceNamers...),
nvcdi.WithMode(opts.mode), nvcdi.WithMode(opts.mode),
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()), nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
nvcdi.WithCSVFiles(opts.csv.files.Value()), nvcdi.WithCSVFiles(opts.csv.files.Value()),

View File

@ -48,8 +48,8 @@ type Interface interface {
GetCommonEdits() (*cdi.ContainerEdits, error) GetCommonEdits() (*cdi.ContainerEdits, error)
GetAllDeviceSpecs() ([]specs.Device, error) GetAllDeviceSpecs() ([]specs.Device, error)
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error)
GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error)
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error)
GetDeviceSpecsByID(...string) ([]specs.Device, error) GetDeviceSpecsByID(...string) ([]specs.Device, error)
} }

View File

@ -34,23 +34,26 @@ import (
) )
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'. // GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) { func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
edits, err := l.GetGPUDeviceEdits(d) edits, err := l.GetGPUDeviceEdits(d)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get edits for device: %v", err) return nil, fmt.Errorf("failed to get edits for device: %v", err)
} }
name, err := l.deviceNamer.GetDeviceName(i, convert{d}) var deviceSpecs []specs.Device
names, err := l.deviceNamers.GetDeviceNames(i, convert{d})
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err) return nil, fmt.Errorf("failed to get device name: %v", err)
} }
for _, name := range names {
spec := specs.Device{ spec := specs.Device{
Name: name, Name: name,
ContainerEdits: *edits.ContainerEdits, ContainerEdits: *edits.ContainerEdits,
} }
deviceSpecs = append(deviceSpecs, spec)
}
return &spec, nil return deviceSpecs, nil
} }
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'. // GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.

View File

@ -68,7 +68,7 @@ func (l *gdslib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
} }
// GetGPUDeviceSpecs is unsupported for the gdslib specs // GetGPUDeviceSpecs is unsupported for the gdslib specs
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) { func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported") return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
} }
@ -78,7 +78,7 @@ func (l *gdslib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
} }
// GetMIGDeviceSpecs is unsupported for the gdslib specs // GetMIGDeviceSpecs is unsupported for the gdslib specs
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported") return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
} }

View File

@ -58,16 +58,20 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
return nil, fmt.Errorf("failed to create container edits for CSV files: %v", err) return nil, fmt.Errorf("failed to create container edits for CSV files: %v", err)
} }
name, err := l.deviceNamer.GetDeviceName(0, uuidUnsupported{}) names, err := l.deviceNamers.GetDeviceNames(0, uuidIgnored{})
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err) return nil, fmt.Errorf("failed to get device name: %v", err)
} }
var deviceSpecs []specs.Device
for _, name := range names {
deviceSpec := specs.Device{ deviceSpec := specs.Device{
Name: name, Name: name,
ContainerEdits: *e.ContainerEdits, ContainerEdits: *e.ContainerEdits,
} }
return []specs.Device{deviceSpec}, nil deviceSpecs = append(deviceSpecs, deviceSpec)
}
return deviceSpecs, nil
} }
// GetCommonEdits generates a CDI specification that can be used for ANY devices // GetCommonEdits generates a CDI specification that can be used for ANY devices
@ -82,7 +86,7 @@ func (l *csvlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
} }
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'. // GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) { func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported for CSV files") return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported for CSV files")
} }
@ -92,7 +96,7 @@ func (l *csvlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
} }
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'. // GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files") return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files")
} }

View File

@ -208,11 +208,11 @@ func (l *nvmllib) getEditsForMIGDevice(nvmlDevice nvml.Device) (*cdi.ContainerEd
func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) { func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
var deviceSpecs []specs.Device var deviceSpecs []specs.Device
err := l.devicelib.VisitDevices(func(i int, d device.Device) error { err := l.devicelib.VisitDevices(func(i int, d device.Device) error {
deviceSpec, err := l.GetGPUDeviceSpecs(i, d) specsForDevice, err := l.GetGPUDeviceSpecs(i, d)
if err != nil { if err != nil {
return err return err
} }
deviceSpecs = append(deviceSpecs, *deviceSpec) deviceSpecs = append(deviceSpecs, specsForDevice...)
return nil return nil
}) })
@ -225,11 +225,11 @@ func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
func (l *nvmllib) getMigDeviceSpecs() ([]specs.Device, error) { func (l *nvmllib) getMigDeviceSpecs() ([]specs.Device, error) {
var deviceSpecs []specs.Device var deviceSpecs []specs.Device
err := l.devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error { err := l.devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
deviceSpec, err := l.GetMIGDeviceSpecs(i, d, j, mig) specsForDevice, err := l.GetMIGDeviceSpecs(i, d, j, mig)
if err != nil { if err != nil {
return err return err
} }
deviceSpecs = append(deviceSpecs, *deviceSpec) deviceSpecs = append(deviceSpecs, specsForDevice...)
return nil return nil
}) })

View File

@ -68,7 +68,7 @@ func (l *wsllib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
} }
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'. // GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) { func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported on WSL") return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported on WSL")
} }
@ -78,7 +78,7 @@ func (l *wsllib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
} }
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'. // GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported on WSL") return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported on WSL")
} }

View File

@ -44,7 +44,7 @@ type nvcdilib struct {
nvmllib nvml.Interface nvmllib nvml.Interface
mode string mode string
devicelib device.Interface devicelib device.Interface
deviceNamer DeviceNamer deviceNamers DeviceNamers
driverRoot string driverRoot string
devRoot string devRoot string
nvidiaCTKPath string nvidiaCTKPath string
@ -75,8 +75,9 @@ func New(opts ...Option) (Interface, error) {
if l.logger == nil { if l.logger == nil {
l.logger = logger.New() l.logger = logger.New()
} }
if l.deviceNamer == nil { if len(l.deviceNamers) == 0 {
l.deviceNamer, _ = NewDeviceNamer(DeviceNameStrategyIndex) indexNamer, _ := NewDeviceNamer(DeviceNameStrategyIndex)
l.deviceNamers = []DeviceNamer{indexNamer}
} }
if l.driverRoot == "" { if l.driverRoot == "" {
l.driverRoot = "/" l.driverRoot = "/"

View File

@ -175,7 +175,7 @@ func (m *managementlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, e
} }
// GetGPUDeviceSpecs is unsupported for the managementlib specs // GetGPUDeviceSpecs is unsupported for the managementlib specs
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) { func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported") return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
} }
@ -185,7 +185,7 @@ func (m *managementlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi
} }
// GetMIGDeviceSpecs is unsupported for the managementlib specs // GetMIGDeviceSpecs is unsupported for the managementlib specs
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported") return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
} }

View File

@ -31,23 +31,25 @@ import (
) )
// GetMIGDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'. // GetMIGDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
func (l *nvmllib) GetMIGDeviceSpecs(i int, d device.Device, j int, mig device.MigDevice) (*specs.Device, error) { func (l *nvmllib) GetMIGDeviceSpecs(i int, d device.Device, j int, mig device.MigDevice) ([]specs.Device, error) {
edits, err := l.GetMIGDeviceEdits(d, mig) edits, err := l.GetMIGDeviceEdits(d, mig)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get edits for device: %v", err) return nil, fmt.Errorf("failed to get edits for device: %v", err)
} }
name, err := l.deviceNamer.GetMigDeviceName(i, convert{d}, j, convert{mig}) names, err := l.deviceNamers.GetMigDeviceNames(i, convert{d}, j, convert{mig})
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get device name: %v", err) return nil, fmt.Errorf("failed to get device name: %v", err)
} }
var deviceSpecs []specs.Device
for _, name := range names {
spec := specs.Device{ spec := specs.Device{
Name: name, Name: name,
ContainerEdits: *edits.ContainerEdits, ContainerEdits: *edits.ContainerEdits,
} }
deviceSpecs = append(deviceSpecs, spec)
return &spec, nil }
return deviceSpecs, nil
} }
// GetMIGDeviceEdits returns the CDI edits for the MIG device represented by 'mig' on 'parent'. // GetMIGDeviceEdits returns the CDI edits for the MIG device represented by 'mig' on 'parent'.

View File

@ -68,7 +68,7 @@ func (l *mofedlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
} }
// GetGPUDeviceSpecs is unsupported for the mofedlib specs // GetGPUDeviceSpecs is unsupported for the mofedlib specs
func (l *mofedlib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) { func (l *mofedlib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported") return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
} }
@ -78,7 +78,7 @@ func (l *mofedlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Cont
} }
// GetMIGDeviceSpecs is unsupported for the mofedlib specs // GetMIGDeviceSpecs is unsupported for the mofedlib specs
func (l *mofedlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { func (l *mofedlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported") return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
} }

View File

@ -28,6 +28,9 @@ type UUIDer interface {
GetUUID() (string, error) GetUUID() (string, error)
} }
// DeviceNamers represents a list of device namers
type DeviceNamers []DeviceNamer
// DeviceNamer is an interface for getting device names // DeviceNamer is an interface for getting device names
type DeviceNamer interface { type DeviceNamer interface {
GetDeviceName(int, UUIDer) (string, error) GetDeviceName(int, UUIDer) (string, error)
@ -102,6 +105,12 @@ type convert struct {
nvmlUUIDer nvmlUUIDer
} }
type uuidIgnored struct{}
func (m uuidIgnored) GetUUID() (string, error) {
return "", nil
}
type uuidUnsupported struct{} type uuidUnsupported struct{}
func (m convert) GetUUID() (string, error) { func (m convert) GetUUID() (string, error) {
@ -120,3 +129,39 @@ var errUUIDUnsupported = errors.New("GetUUID is not supported")
func (m uuidUnsupported) GetUUID() (string, error) { func (m uuidUnsupported) GetUUID() (string, error) {
return "", errUUIDUnsupported return "", errUUIDUnsupported
} }
func (l DeviceNamers) GetDeviceNames(i int, d UUIDer) ([]string, error) {
var names []string
for _, namer := range l {
name, err := namer.GetDeviceName(i, d)
if err != nil {
return nil, err
}
if name == "" {
continue
}
names = append(names, name)
}
if len(names) == 0 {
return nil, errors.New("no names defined")
}
return names, nil
}
func (l DeviceNamers) GetMigDeviceNames(i int, d UUIDer, j int, mig UUIDer) ([]string, error) {
var names []string
for _, namer := range l {
name, err := namer.GetMigDeviceName(i, d, j, mig)
if err != nil {
return nil, err
}
if name == "" {
continue
}
names = append(names, name)
}
if len(names) == 0 {
return nil, errors.New("no names defined")
}
return names, nil
}

View File

@ -34,10 +34,10 @@ func WithDeviceLib(devicelib device.Interface) Option {
} }
} }
// WithDeviceNamer sets the device namer for the library // WithDeviceNamers sets the device namer for the library
func WithDeviceNamer(namer DeviceNamer) Option { func WithDeviceNamers(namers ...DeviceNamer) Option {
return func(l *nvcdilib) { return func(l *nvcdilib) {
l.deviceNamer = namer l.deviceNamers = namers
} }
} }