mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-21 15:57:49 +00:00
Allow multiple device name strategies to be specified
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
a8d48808d7
commit
52da12cf9a
@ -2,6 +2,8 @@
|
||||
|
||||
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
|
||||
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
|
||||
* Allow multiple device naming strategies for `nvidia-ctk cdi generate` command. This allows a single
|
||||
CDI spec to be generated that includes GPUs by index and UUID.
|
||||
|
||||
## v1.15.0-rc.3
|
||||
* Fix bug in `nvidia-ctk hook update-ldcache` where default `--ldconfig-path` value was not applied.
|
||||
|
@ -42,16 +42,16 @@ type command struct {
|
||||
}
|
||||
|
||||
type options struct {
|
||||
output string
|
||||
format string
|
||||
deviceNameStrategy string
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCTKPath string
|
||||
ldconfigPath string
|
||||
mode string
|
||||
vendor string
|
||||
class string
|
||||
output string
|
||||
format string
|
||||
deviceNameStrategies cli.StringSlice
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCTKPath string
|
||||
ldconfigPath string
|
||||
mode string
|
||||
vendor string
|
||||
class string
|
||||
|
||||
librarySearchPaths cli.StringSlice
|
||||
|
||||
@ -109,11 +109,11 @@ func (m command) build() *cli.Command {
|
||||
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
|
||||
Destination: &opts.devRoot,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "device-name-strategy",
|
||||
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
|
||||
Value: nvcdi.DeviceNameStrategyIndex,
|
||||
Destination: &opts.deviceNameStrategy,
|
||||
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
|
||||
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex),
|
||||
Destination: &opts.deviceNameStrategies,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "driver-root",
|
||||
@ -185,9 +185,11 @@ func (m command) validateFlags(c *cli.Context, opts *options) error {
|
||||
return fmt.Errorf("invalid discovery mode: %v", opts.mode)
|
||||
}
|
||||
|
||||
_, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return err
|
||||
for _, strategy := range opts.deviceNameStrategies.Value() {
|
||||
_, err := nvcdi.NewDeviceNamer(strategy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
opts.nvidiaCTKPath = config.ResolveNVIDIACTKPath(m.logger, opts.nvidiaCTKPath)
|
||||
@ -241,9 +243,13 @@ func formatFromFilename(filename string) string {
|
||||
}
|
||||
|
||||
func (m command) generateSpec(opts *options) (spec.Interface, error) {
|
||||
deviceNamer, err := nvcdi.NewDeviceNamer(opts.deviceNameStrategy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device namer: %v", err)
|
||||
var deviceNamers []nvcdi.DeviceNamer
|
||||
for _, strategy := range opts.deviceNameStrategies.Value() {
|
||||
deviceNamer, err := nvcdi.NewDeviceNamer(strategy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create device namer: %v", err)
|
||||
}
|
||||
deviceNamers = append(deviceNamers, deviceNamer)
|
||||
}
|
||||
|
||||
cdilib, err := nvcdi.New(
|
||||
@ -252,7 +258,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
|
||||
nvcdi.WithDevRoot(opts.devRoot),
|
||||
nvcdi.WithNVIDIACTKPath(opts.nvidiaCTKPath),
|
||||
nvcdi.WithLdconfigPath(opts.ldconfigPath),
|
||||
nvcdi.WithDeviceNamer(deviceNamer),
|
||||
nvcdi.WithDeviceNamers(deviceNamers...),
|
||||
nvcdi.WithMode(opts.mode),
|
||||
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
|
||||
nvcdi.WithCSVFiles(opts.csv.files.Value()),
|
||||
|
@ -48,8 +48,8 @@ type Interface interface {
|
||||
GetCommonEdits() (*cdi.ContainerEdits, error)
|
||||
GetAllDeviceSpecs() ([]specs.Device, error)
|
||||
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
|
||||
GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error)
|
||||
GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error)
|
||||
GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error)
|
||||
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error)
|
||||
GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error)
|
||||
GetDeviceSpecsByID(...string) ([]specs.Device, error)
|
||||
}
|
||||
|
@ -34,23 +34,26 @@ import (
|
||||
)
|
||||
|
||||
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
|
||||
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
|
||||
func (l *nvmllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
|
||||
edits, err := l.GetGPUDeviceEdits(d)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get edits for device: %v", err)
|
||||
}
|
||||
|
||||
name, err := l.deviceNamer.GetDeviceName(i, convert{d})
|
||||
var deviceSpecs []specs.Device
|
||||
names, err := l.deviceNamers.GetDeviceNames(i, convert{d})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get device name: %v", err)
|
||||
}
|
||||
|
||||
spec := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: *edits.ContainerEdits,
|
||||
for _, name := range names {
|
||||
spec := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: *edits.ContainerEdits,
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, spec)
|
||||
}
|
||||
|
||||
return &spec, nil
|
||||
return deviceSpecs, nil
|
||||
}
|
||||
|
||||
// GetGPUDeviceEdits returns the CDI edits for the full GPU represented by 'device'.
|
||||
|
@ -68,7 +68,7 @@ func (l *gdslib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
|
||||
}
|
||||
|
||||
// GetGPUDeviceSpecs is unsupported for the gdslib specs
|
||||
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
|
||||
func (l *gdslib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
|
||||
}
|
||||
|
||||
@ -78,7 +78,7 @@ func (l *gdslib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
|
||||
}
|
||||
|
||||
// GetMIGDeviceSpecs is unsupported for the gdslib specs
|
||||
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
|
||||
func (l *gdslib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
|
||||
}
|
||||
|
||||
|
@ -58,16 +58,20 @@ func (l *csvlib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("failed to create container edits for CSV files: %v", err)
|
||||
}
|
||||
|
||||
name, err := l.deviceNamer.GetDeviceName(0, uuidUnsupported{})
|
||||
names, err := l.deviceNamers.GetDeviceNames(0, uuidIgnored{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get device name: %v", err)
|
||||
}
|
||||
|
||||
deviceSpec := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: *e.ContainerEdits,
|
||||
var deviceSpecs []specs.Device
|
||||
for _, name := range names {
|
||||
deviceSpec := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: *e.ContainerEdits,
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, deviceSpec)
|
||||
}
|
||||
return []specs.Device{deviceSpec}, nil
|
||||
|
||||
return deviceSpecs, nil
|
||||
}
|
||||
|
||||
// GetCommonEdits generates a CDI specification that can be used for ANY devices
|
||||
@ -82,7 +86,7 @@ func (l *csvlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
|
||||
}
|
||||
|
||||
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
|
||||
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
|
||||
func (l *csvlib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported for CSV files")
|
||||
}
|
||||
|
||||
@ -92,7 +96,7 @@ func (l *csvlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
|
||||
}
|
||||
|
||||
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
|
||||
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
|
||||
func (l *csvlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported for CSV files")
|
||||
}
|
||||
|
||||
|
@ -208,11 +208,11 @@ func (l *nvmllib) getEditsForMIGDevice(nvmlDevice nvml.Device) (*cdi.ContainerEd
|
||||
func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
|
||||
var deviceSpecs []specs.Device
|
||||
err := l.devicelib.VisitDevices(func(i int, d device.Device) error {
|
||||
deviceSpec, err := l.GetGPUDeviceSpecs(i, d)
|
||||
specsForDevice, err := l.GetGPUDeviceSpecs(i, d)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, *deviceSpec)
|
||||
deviceSpecs = append(deviceSpecs, specsForDevice...)
|
||||
|
||||
return nil
|
||||
})
|
||||
@ -225,11 +225,11 @@ func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
|
||||
func (l *nvmllib) getMigDeviceSpecs() ([]specs.Device, error) {
|
||||
var deviceSpecs []specs.Device
|
||||
err := l.devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
|
||||
deviceSpec, err := l.GetMIGDeviceSpecs(i, d, j, mig)
|
||||
specsForDevice, err := l.GetMIGDeviceSpecs(i, d, j, mig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, *deviceSpec)
|
||||
deviceSpecs = append(deviceSpecs, specsForDevice...)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
@ -68,7 +68,7 @@ func (l *wsllib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) {
|
||||
}
|
||||
|
||||
// GetGPUDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
|
||||
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) (*specs.Device, error) {
|
||||
func (l *wsllib) GetGPUDeviceSpecs(i int, d device.Device) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported on WSL")
|
||||
}
|
||||
|
||||
@ -78,7 +78,7 @@ func (l *wsllib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Contai
|
||||
}
|
||||
|
||||
// GetMIGDeviceSpecs returns the CDI device specs for the full MIG represented by 'device'.
|
||||
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
|
||||
func (l *wsllib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported on WSL")
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,7 @@ type nvcdilib struct {
|
||||
nvmllib nvml.Interface
|
||||
mode string
|
||||
devicelib device.Interface
|
||||
deviceNamer DeviceNamer
|
||||
deviceNamers DeviceNamers
|
||||
driverRoot string
|
||||
devRoot string
|
||||
nvidiaCTKPath string
|
||||
@ -75,8 +75,9 @@ func New(opts ...Option) (Interface, error) {
|
||||
if l.logger == nil {
|
||||
l.logger = logger.New()
|
||||
}
|
||||
if l.deviceNamer == nil {
|
||||
l.deviceNamer, _ = NewDeviceNamer(DeviceNameStrategyIndex)
|
||||
if len(l.deviceNamers) == 0 {
|
||||
indexNamer, _ := NewDeviceNamer(DeviceNameStrategyIndex)
|
||||
l.deviceNamers = []DeviceNamer{indexNamer}
|
||||
}
|
||||
if l.driverRoot == "" {
|
||||
l.driverRoot = "/"
|
||||
|
@ -175,7 +175,7 @@ func (m *managementlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, e
|
||||
}
|
||||
|
||||
// GetGPUDeviceSpecs is unsupported for the managementlib specs
|
||||
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
|
||||
func (m *managementlib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
|
||||
}
|
||||
|
||||
@ -185,7 +185,7 @@ func (m *managementlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi
|
||||
}
|
||||
|
||||
// GetMIGDeviceSpecs is unsupported for the managementlib specs
|
||||
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
|
||||
func (m *managementlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
|
||||
}
|
||||
|
||||
|
@ -31,23 +31,25 @@ import (
|
||||
)
|
||||
|
||||
// GetMIGDeviceSpecs returns the CDI device specs for the full GPU represented by 'device'.
|
||||
func (l *nvmllib) GetMIGDeviceSpecs(i int, d device.Device, j int, mig device.MigDevice) (*specs.Device, error) {
|
||||
func (l *nvmllib) GetMIGDeviceSpecs(i int, d device.Device, j int, mig device.MigDevice) ([]specs.Device, error) {
|
||||
edits, err := l.GetMIGDeviceEdits(d, mig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get edits for device: %v", err)
|
||||
}
|
||||
|
||||
name, err := l.deviceNamer.GetMigDeviceName(i, convert{d}, j, convert{mig})
|
||||
names, err := l.deviceNamers.GetMigDeviceNames(i, convert{d}, j, convert{mig})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get device name: %v", err)
|
||||
}
|
||||
|
||||
spec := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: *edits.ContainerEdits,
|
||||
var deviceSpecs []specs.Device
|
||||
for _, name := range names {
|
||||
spec := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: *edits.ContainerEdits,
|
||||
}
|
||||
deviceSpecs = append(deviceSpecs, spec)
|
||||
}
|
||||
|
||||
return &spec, nil
|
||||
return deviceSpecs, nil
|
||||
}
|
||||
|
||||
// GetMIGDeviceEdits returns the CDI edits for the MIG device represented by 'mig' on 'parent'.
|
||||
|
@ -68,7 +68,7 @@ func (l *mofedlib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
|
||||
}
|
||||
|
||||
// GetGPUDeviceSpecs is unsupported for the mofedlib specs
|
||||
func (l *mofedlib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) {
|
||||
func (l *mofedlib) GetGPUDeviceSpecs(int, device.Device) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetGPUDeviceSpecs is not supported")
|
||||
}
|
||||
|
||||
@ -78,7 +78,7 @@ func (l *mofedlib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.Cont
|
||||
}
|
||||
|
||||
// GetMIGDeviceSpecs is unsupported for the mofedlib specs
|
||||
func (l *mofedlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) {
|
||||
func (l *mofedlib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) ([]specs.Device, error) {
|
||||
return nil, fmt.Errorf("GetMIGDeviceSpecs is not supported")
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,9 @@ type UUIDer interface {
|
||||
GetUUID() (string, error)
|
||||
}
|
||||
|
||||
// DeviceNamers represents a list of device namers
|
||||
type DeviceNamers []DeviceNamer
|
||||
|
||||
// DeviceNamer is an interface for getting device names
|
||||
type DeviceNamer interface {
|
||||
GetDeviceName(int, UUIDer) (string, error)
|
||||
@ -102,6 +105,12 @@ type convert struct {
|
||||
nvmlUUIDer
|
||||
}
|
||||
|
||||
type uuidIgnored struct{}
|
||||
|
||||
func (m uuidIgnored) GetUUID() (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
type uuidUnsupported struct{}
|
||||
|
||||
func (m convert) GetUUID() (string, error) {
|
||||
@ -120,3 +129,39 @@ var errUUIDUnsupported = errors.New("GetUUID is not supported")
|
||||
func (m uuidUnsupported) GetUUID() (string, error) {
|
||||
return "", errUUIDUnsupported
|
||||
}
|
||||
|
||||
func (l DeviceNamers) GetDeviceNames(i int, d UUIDer) ([]string, error) {
|
||||
var names []string
|
||||
for _, namer := range l {
|
||||
name, err := namer.GetDeviceName(i, d)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
names = append(names, name)
|
||||
}
|
||||
if len(names) == 0 {
|
||||
return nil, errors.New("no names defined")
|
||||
}
|
||||
return names, nil
|
||||
}
|
||||
|
||||
func (l DeviceNamers) GetMigDeviceNames(i int, d UUIDer, j int, mig UUIDer) ([]string, error) {
|
||||
var names []string
|
||||
for _, namer := range l {
|
||||
name, err := namer.GetMigDeviceName(i, d, j, mig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
names = append(names, name)
|
||||
}
|
||||
if len(names) == 0 {
|
||||
return nil, errors.New("no names defined")
|
||||
}
|
||||
return names, nil
|
||||
}
|
||||
|
@ -34,10 +34,10 @@ func WithDeviceLib(devicelib device.Interface) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithDeviceNamer sets the device namer for the library
|
||||
func WithDeviceNamer(namer DeviceNamer) Option {
|
||||
// WithDeviceNamers sets the device namer for the library
|
||||
func WithDeviceNamers(namers ...DeviceNamer) Option {
|
||||
return func(l *nvcdilib) {
|
||||
l.deviceNamer = namer
|
||||
l.deviceNamers = namers
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user