Add nvcdi package with basic CDI generation API

This change adds an nvcdi package that exposes a basic API for
CDI spec generation. This is used from the nvidia-ctk cdi generate
command and can be consumed by DRA implementations and the device plugin.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2022-12-02 14:17:52 +01:00
parent 179133c8ad
commit 5b110fba2d
11 changed files with 456 additions and 189 deletions

View File

@@ -1,63 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// NewCommonDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
// This includes driver libraries and meta devices, for example.
func NewCommonDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
metaDevices := discover.NewDeviceDiscoverer(
logger,
lookup.NewCharDeviceLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
[]string{
"/dev/nvidia-modeset",
"/dev/nvidia-uvm-tools",
"/dev/nvidia-uvm",
"/dev/nvidiactl",
},
)
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("error constructing discoverer for graphics mounts: %v", err)
}
driverFiles, err := NewDriverDiscoverer(logger, driverRoot, nvidiaCTKPath, nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
}
d := discover.Merge(
metaDevices,
graphicsMounts,
driverFiles,
)
return d, nil
}

View File

@@ -17,9 +17,12 @@
package generate
import (
"fmt"
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/sirupsen/logrus"
)
@@ -33,6 +36,16 @@ type deviceFolderPermissions struct {
var _ discover.Discover = (*deviceFolderPermissions)(nil)
// GetDeviceFolderPermissionHookEdits gets the edits required for device folder permissions discoverer
func GetDeviceFolderPermissionHookEdits(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, deviceSpecs []specs.Device) (*cdi.ContainerEdits, error) {
deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(logger, driverRoot, nvidiaCTKPath, deviceSpecs)
if err != nil {
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
}
return edits.FromDiscoverer(deviceFolderPermissionHooks)
}
// NewDeviceFolderPermissionHookDiscoverer creates a discoverer that can be used to update the permissions for the parent folders of nested device nodes from the specified set of device specs.
// This works around an issue with rootless podman when using crun as a low-level runtime.
// See https://github.com/containers/crun/issues/1047

View File

@@ -1,162 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
// The supplied NVML Library is used to query the expected driver version.
func NewDriverDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) {
version, r := nvmllib.SystemGetDriverVersion()
if r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to determine driver version: %v", r)
}
libraries, err := NewDriverLibraryDiscoverer(logger, driverRoot, nvidiaCTKPath, version)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
}
ipcs, err := discover.NewIPCDiscoverer(logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
}
firmwares := NewDriverFirmwareDiscoverer(logger, driverRoot, version)
binaries := NewDriverBinariesDiscoverer(logger, driverRoot)
d := discover.Merge(
libraries,
ipcs,
firmwares,
binaries,
)
return d, nil
}
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
func NewDriverLibraryDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, version string) (discover.Discover, error) {
libraryPaths, err := getVersionLibs(logger, driverRoot, version)
if err != nil {
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
}
libraries := discover.NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
libraryPaths,
)
cfg := &discover.Config{
DriverRoot: driverRoot,
NvidiaCTKPath: nvidiaCTKPath,
}
hooks, _ := discover.NewLDCacheUpdateHook(logger, libraries, cfg)
d := discover.Merge(
libraries,
hooks,
)
return d, nil
}
// NewDriverFirmwareDiscoverer creates a discoverer for GSP firmware associated with the specified driver version.
func NewDriverFirmwareDiscoverer(logger *logrus.Logger, driverRoot string, version string) discover.Discover {
gspFirmwarePath := filepath.Join("/lib/firmware/nvidia", version, "gsp.bin")
return discover.NewMounts(
logger,
lookup.NewFileLocator(
lookup.WithLogger(logger),
lookup.WithRoot(driverRoot),
),
driverRoot,
[]string{gspFirmwarePath},
)
}
// NewDriverBinariesDiscoverer creates a discoverer for GSP firmware associated with the GPU driver.
func NewDriverBinariesDiscoverer(logger *logrus.Logger, driverRoot string) discover.Discover {
return discover.NewMounts(
logger,
lookup.NewExecutableLocator(logger, driverRoot),
driverRoot,
[]string{
"nvidia-smi", /* System management interface */
"nvidia-debugdump", /* GPU coredump utility */
"nvidia-persistenced", /* Persistence mode utility */
"nvidia-cuda-mps-control", /* Multi process service CLI */
"nvidia-cuda-mps-server", /* Multi process service server */
},
)
}
// getVersionLibs checks the LDCache for libraries ending in the specified driver version.
// Although the ldcache at the specified driverRoot is queried, the paths are returned relative to this driverRoot.
// This allows the standard mount location logic to be used for resolving the mounts.
func getVersionLibs(logger *logrus.Logger, driverRoot string, version string) ([]string, error) {
logger.Infof("Using driver version %v", version)
cache, err := ldcache.New(logger, driverRoot)
if err != nil {
return nil, fmt.Errorf("failed to load ldcache: %v", err)
}
libs32, libs64 := cache.List()
var libs []string
for _, l := range libs64 {
if strings.HasSuffix(l, version) {
logger.Infof("found 64-bit driver lib: %v", l)
libs = append(libs, l)
}
}
for _, l := range libs32 {
if strings.HasSuffix(l, version) {
logger.Infof("found 32-bit driver lib: %v", l)
libs = append(libs, l)
}
}
if driverRoot == "/" || driverRoot == "" {
return libs, nil
}
var relative []string
for _, l := range libs {
relative = append(relative, strings.TrimPrefix(l, driverRoot))
}
return relative, nil
}

View File

@@ -1,160 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/drm"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// byPathHookDiscoverer discovers the entities required for injecting by-path DRM device links
type byPathHookDiscoverer struct {
logger *logrus.Logger
driverRoot string
nvidiaCTKPath string
pciBusID string
}
var _ discover.Discover = (*byPathHookDiscoverer)(nil)
// NewFullGPUDiscoverer creates a discoverer for the full GPU defined by the specified device.
func NewFullGPUDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, d device.Device) (discover.Discover, error) {
// TODO: The functionality to get device paths should be integrated into the go-nvlib/pkg/device.Device interface.
// This will allow reuse here and in other code where the paths are queried such as the NVIDIA device plugin.
minor, ret := d.GetMinorNumber()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
}
path := fmt.Sprintf("/dev/nvidia%d", minor)
pciInfo, ret := d.GetPciInfo()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting PCI info for device: %v", ret)
}
pciBusID := getBusID(pciInfo)
drmDeviceNodes, err := drm.GetDeviceNodesByBusID(pciBusID)
if err != nil {
return nil, fmt.Errorf("failed to determine DRM devices for %v: %v", pciBusID, err)
}
deviceNodePaths := append([]string{path}, drmDeviceNodes...)
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
deviceNodePaths,
driverRoot,
)
byPathHooks := &byPathHookDiscoverer{
logger: logger,
driverRoot: driverRoot,
nvidiaCTKPath: nvidiaCTKPath,
pciBusID: pciBusID,
}
dd := discover.Merge(
deviceNodes,
byPathHooks,
)
return dd, nil
}
// Devices returns the empty list for the by-path hook discoverer
func (d *byPathHookDiscoverer) Devices() ([]discover.Device, error) {
return nil, nil
}
// Hooks returns the hooks for the GPU device.
// The following hooks are detected:
// 1. A hook to create /dev/dri/by-path symlinks
func (d *byPathHookDiscoverer) Hooks() ([]discover.Hook, error) {
links, err := d.deviceNodeLinks()
if err != nil {
return nil, fmt.Errorf("failed to discover DRA device links: %v", err)
}
if len(links) == 0 {
return nil, nil
}
var args []string
for _, l := range links {
args = append(args, "--link", l)
}
hook := discover.CreateNvidiaCTKHook(
d.nvidiaCTKPath,
"create-symlinks",
args...,
)
return []discover.Hook{hook}, nil
}
// Mounts returns an empty slice for a full GPU
func (d *byPathHookDiscoverer) Mounts() ([]discover.Mount, error) {
return nil, nil
}
func (d *byPathHookDiscoverer) deviceNodeLinks() ([]string, error) {
candidates := []string{
fmt.Sprintf("/dev/dri/by-path/pci-%s-card", d.pciBusID),
fmt.Sprintf("/dev/dri/by-path/pci-%s-render", d.pciBusID),
}
var links []string
for _, c := range candidates {
linkPath := filepath.Join(d.driverRoot, c)
device, err := os.Readlink(linkPath)
if err != nil {
d.logger.Warningf("Failed to evaluate symlink %v; ignoring", linkPath)
continue
}
d.logger.Debugf("adding device symlink %v -> %v", linkPath, device)
links = append(links, fmt.Sprintf("%v::%v", device, linkPath))
}
return links, nil
}
// getBusID provides a utility function that returns the string representation of the bus ID.
func getBusID(p nvml.PciInfo) string {
var bytes []byte
for _, b := range p.BusId {
if byte(b) == '\x00' {
break
}
bytes = append(bytes, byte(b))
}
id := strings.ToLower(string(bytes))
if id != "0000" {
id = strings.TrimPrefix(id, "0000")
}
return id
}

View File

@@ -23,8 +23,8 @@ import (
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/sirupsen/logrus"
@@ -90,7 +90,7 @@ func (m command) build() *cli.Command {
&cli.StringFlag{
Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. One of [index | uuid | type-index]",
Value: deviceNameStrategyIndex,
Value: nvcdi.DeviceNameStrategyIndex,
Destination: &cfg.deviceNameStrategy,
},
&cli.StringFlag{
@@ -117,7 +117,7 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
return fmt.Errorf("invalid output format: %v", cfg.format)
}
_, err := newDeviceNamer(cfg.deviceNameStrategy)
_, err := nvcdi.NewDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return err
}
@@ -126,16 +126,7 @@ func (m command) validateFlags(r *cli.Context, cfg *config) error {
}
func (m command) run(c *cli.Context, cfg *config) error {
deviceNamer, err := newDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return fmt.Errorf("failed to create device namer: %v", err)
}
spec, err := m.generateSpec(
cfg.driverRoot,
discover.FindNvidiaCTK(m.logger, cfg.nvidiaCTKPath),
deviceNamer,
)
spec, err := m.generateSpec(cfg)
if err != nil {
return fmt.Errorf("failed to generate CDI spec: %v", err)
}
@@ -214,7 +205,12 @@ func writeToOutput(format string, data []byte, output io.Writer) error {
return nil
}
func (m command) generateSpec(driverRoot string, nvidiaCTKPath string, namer deviceNamer) (*specs.Spec, error) {
func (m command) generateSpec(cfg *config) (*specs.Spec, error) {
deviceNamer, err := nvcdi.NewDeviceNamer(cfg.deviceNameStrategy)
if err != nil {
return nil, fmt.Errorf("failed to create device namer: %v", err)
}
nvmllib := nvml.New()
if r := nvmllib.Init(); r != nvml.SUCCESS {
return nil, r
@@ -223,7 +219,16 @@ func (m command) generateSpec(driverRoot string, nvidiaCTKPath string, namer dev
devicelib := device.New(device.WithNvml(nvmllib))
deviceSpecs, err := m.generateDeviceSpecs(devicelib, driverRoot, nvidiaCTKPath, namer)
cdilib := nvcdi.New(
nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(cfg.driverRoot),
nvcdi.WithNVIDIACTKPath(cfg.nvidiaCTKPath),
nvcdi.WithDeviceNamer(deviceNamer),
nvcdi.WithDeviceLib(devicelib),
nvcdi.WithNvmlLib(nvmllib),
)
deviceSpecs, err := cdilib.GetAllDeviceSpecs()
if err != nil {
return nil, fmt.Errorf("failed to create device CDI specs: %v", err)
}
@@ -232,20 +237,16 @@ func (m command) generateSpec(driverRoot string, nvidiaCTKPath string, namer dev
deviceSpecs = append(deviceSpecs, allDevice)
common, err := NewCommonDiscoverer(m.logger, driverRoot, nvidiaCTKPath, nvmllib)
commonEdits, err := cdilib.GetCommonEdits()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
return nil, fmt.Errorf("failed to create edits common for entities: %v", err)
}
deviceFolderPermissionEdits, err := GetDeviceFolderPermissionHookEdits(m.logger, cfg.driverRoot, cfg.nvidiaCTKPath, deviceSpecs)
if err != nil {
return nil, fmt.Errorf("failed to generated edits for device folder permissions: %v", err)
}
deviceFolderPermissionHooks, err := NewDeviceFolderPermissionHookDiscoverer(m.logger, driverRoot, nvidiaCTKPath, deviceSpecs)
if err != nil {
return nil, fmt.Errorf("failed to generated permission hooks for device nodes: %v", err)
}
commonEdits, err := edits.FromDiscoverer(discover.Merge(common, deviceFolderPermissionHooks))
if err != nil {
return nil, fmt.Errorf("failed to create container edits for common entities: %v", err)
}
commonEdits.Append(deviceFolderPermissionEdits)
// We construct the spec and determine the minimum required version based on the specification.
spec := specs.Spec{
@@ -266,73 +267,6 @@ func (m command) generateSpec(driverRoot string, nvidiaCTKPath string, namer dev
return &spec, nil
}
func (m command) generateDeviceSpecs(devicelib device.Interface, driverRoot string, nvidiaCTKPath string, namer deviceNamer) ([]specs.Device, error) {
var deviceSpecs []specs.Device
err := devicelib.VisitDevices(func(i int, d device.Device) error {
isMigEnabled, err := d.IsMigEnabled()
if err != nil {
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
}
if isMigEnabled {
return nil
}
device, err := NewFullGPUDiscoverer(m.logger, driverRoot, nvidiaCTKPath, d)
if err != nil {
return fmt.Errorf("failed to create device: %v", err)
}
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return fmt.Errorf("failed to create container edits for device: %v", err)
}
deviceName, err := namer.GetDeviceName(i, d)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: deviceName,
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
}
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
device, err := NewMigDeviceDiscoverer(m.logger, "", d, mig)
if err != nil {
return fmt.Errorf("failed to create MIG device: %v", err)
}
deviceEdits, err := edits.FromDiscoverer(device)
if err != nil {
return fmt.Errorf("failed to create container edits for MIG device: %v", err)
}
deviceName, err := namer.GetMigDeviceName(i, j, mig)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
deviceSpec := specs.Device{
Name: deviceName,
ContainerEdits: *deviceEdits.ContainerEdits,
}
deviceSpecs = append(deviceSpecs, deviceSpec)
return nil
})
if err != nil {
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
}
return deviceSpecs, nil
}
// createAllDevice creates an 'all' device which combines the edits from the previous devices
func createAllDevice(deviceSpecs []specs.Device) specs.Device {
edits := edits.NewContainerEdits()

View File

@@ -1,75 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/sirupsen/logrus"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
// NewMigDeviceDiscoverer creates a discoverer for the specified mig device and its parent.
func NewMigDeviceDiscoverer(logger *logrus.Logger, driverRoot string, parent device.Device, d device.MigDevice) (discover.Discover, error) {
minor, ret := parent.GetMinorNumber()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
}
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
migCaps, err := nvcaps.NewMigCaps()
if err != nil {
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
}
gi, ret := d.GetGpuInstanceId()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
}
ci, ret := d.GetComputeInstanceId()
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
}
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
if err != nil {
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
}
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
if err != nil {
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
}
deviceNodes := discover.NewCharDeviceDiscoverer(
logger,
[]string{
parentPath,
giCapDevicePath,
ciCapDevicePath,
},
driverRoot,
)
return deviceNodes, nil
}

View File

@@ -1,84 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package generate
import (
"fmt"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
)
type deviceNamer interface {
GetDeviceName(int, device.Device) (string, error)
GetMigDeviceName(int, int, device.MigDevice) (string, error)
}
const (
deviceNameStrategyIndex = "index"
deviceNameStrategyTypeIndex = "type-index"
deviceNameStrategyUUID = "uuid"
)
type deviceNameIndex struct {
gpuPrefix string
migPrefix string
}
type deviceNameUUID struct{}
// newDeviceNamer creates a Device Namer based on the supplied strategy.
// This namer can be used to construct the names for MIG and GPU devices when generating the CDI spec.
func newDeviceNamer(strategy string) (deviceNamer, error) {
switch strategy {
case deviceNameStrategyIndex:
return deviceNameIndex{}, nil
case deviceNameStrategyTypeIndex:
return deviceNameIndex{gpuPrefix: "gpu", migPrefix: "mig"}, nil
case deviceNameStrategyUUID:
return deviceNameUUID{}, nil
}
return nil, fmt.Errorf("invalid device name strategy: %v", strategy)
}
// GetDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameIndex) GetDeviceName(i int, d device.Device) (string, error) {
return fmt.Sprintf("%s%d", s.gpuPrefix, i), nil
}
// GetMigDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameIndex) GetMigDeviceName(i int, j int, d device.MigDevice) (string, error) {
return fmt.Sprintf("%s%d:%d", s.migPrefix, i, j), nil
}
// GetDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameUUID) GetDeviceName(i int, d device.Device) (string, error) {
uuid, ret := d.GetUUID()
if ret != nvml.SUCCESS {
return "", fmt.Errorf("failed to get device UUID: %v", ret)
}
return uuid, nil
}
// GetMigDeviceName returns the name for the specified device based on the naming strategy
func (s deviceNameUUID) GetMigDeviceName(i int, j int, d device.MigDevice) (string, error) {
uuid, ret := d.GetUUID()
if ret != nvml.SUCCESS {
return "", fmt.Errorf("failed to get device UUID: %v", ret)
}
return uuid, nil
}