Update to go-nvlib v0.3.0 and go-nvml v0.12.0-4

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2024-04-18 15:01:03 +02:00
parent ddeeca392c
commit 2019cd6f0a
56 changed files with 28425 additions and 5824 deletions

View File

@@ -17,10 +17,10 @@
package device
import (
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
)
// Interface provides the API to the 'device' package
// Interface provides the API to the 'device' package.
type Interface interface {
AssertValidMigProfileFormat(profile string) error
GetDevices() ([]Device, error)
@@ -46,7 +46,7 @@ type devicelib struct {
var _ Interface = &devicelib{}
// New creates a new instance of the 'device' interface
// New creates a new instance of the 'device' interface.
func New(opts ...Option) Interface {
d := &devicelib{}
for _, opt := range opts {
@@ -68,21 +68,21 @@ func New(opts ...Option) Interface {
return d
}
// WithNvml provides an Option to set the NVML library used by the 'device' interface
// WithNvml provides an Option to set the NVML library used by the 'device' interface.
func WithNvml(nvml nvml.Interface) Option {
return func(d *devicelib) {
d.nvml = nvml
}
}
// WithVerifySymbols provides an option to toggle whether to verify select symbols exist in dynamic libraries before calling them
// WithVerifySymbols provides an option to toggle whether to verify select symbols exist in dynamic libraries before calling them.
func WithVerifySymbols(verify bool) Option {
return func(d *devicelib) {
d.verifySymbols = &verify
}
}
// WithSkippedDevices provides an Option to set devices to be skipped by model name
// WithSkippedDevices provides an Option to set devices to be skipped by model name.
func WithSkippedDevices(names ...string) Option {
return func(d *devicelib) {
if d.skippedDevices == nil {
@@ -94,5 +94,5 @@ func WithSkippedDevices(names ...string) Option {
}
}
// Option defines a function for passing options to the New() call
// Option defines a function for passing options to the New() call.
type Option func(*devicelib)

View File

@@ -19,10 +19,10 @@ package device
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
)
// Device defines the set of extended functions associated with a device.Device
// Device defines the set of extended functions associated with a device.Device.
type Device interface {
nvml.Device
GetArchitectureAsString() (string, error)
@@ -44,12 +44,12 @@ type device struct {
var _ Device = &device{}
// NewDevice builds a new Device from an nvml.Device
// NewDevice builds a new Device from an nvml.Device.
func (d *devicelib) NewDevice(dev nvml.Device) (Device, error) {
return d.newDevice(dev)
}
// NewDeviceByUUID builds a new Device from a UUID
// NewDeviceByUUID builds a new Device from a UUID.
func (d *devicelib) NewDeviceByUUID(uuid string) (Device, error) {
dev, ret := d.nvml.DeviceGetHandleByUUID(uuid)
if ret != nvml.SUCCESS {
@@ -58,12 +58,12 @@ func (d *devicelib) NewDeviceByUUID(uuid string) (Device, error) {
return d.newDevice(dev)
}
// newDevice creates a device from an nvml.Device
// newDevice creates a device from an nvml.Device.
func (d *devicelib) newDevice(dev nvml.Device) (*device, error) {
return &device{dev, d, nil}, nil
}
// GetArchitectureAsString returns the Device architecture as a string
// GetArchitectureAsString returns the Device architecture as a string.
func (d *device) GetArchitectureAsString() (string, error) {
arch, ret := d.GetArchitecture()
if ret != nvml.SUCCESS {
@@ -92,7 +92,7 @@ func (d *device) GetArchitectureAsString() (string, error) {
return "", fmt.Errorf("error interpreting device architecture as string: %v", arch)
}
// GetBrandAsString returns the Device architecture as a string
// GetBrandAsString returns the Device architecture as a string.
func (d *device) GetBrandAsString() (string, error) {
brand, ret := d.GetBrand()
if ret != nvml.SUCCESS {
@@ -140,7 +140,7 @@ func (d *device) GetBrandAsString() (string, error) {
return "", fmt.Errorf("error interpreting device brand as string: %v", brand)
}
// GetCudaComputeCapabilityAsString returns the Device's CUDA compute capability as a version string
// GetCudaComputeCapabilityAsString returns the Device's CUDA compute capability as a version string.
func (d *device) GetCudaComputeCapabilityAsString() (string, error) {
major, minor, ret := d.GetCudaComputeCapability()
if ret != nvml.SUCCESS {
@@ -149,7 +149,7 @@ func (d *device) GetCudaComputeCapabilityAsString() (string, error) {
return fmt.Sprintf("%d.%d", major, minor), nil
}
// IsMigCapable checks if a device is capable of having MIG paprtitions created on it
// IsMigCapable checks if a device is capable of having MIG paprtitions created on it.
func (d *device) IsMigCapable() (bool, error) {
if !d.lib.hasSymbol("nvmlDeviceGetMigMode") {
return false, nil
@@ -166,7 +166,7 @@ func (d *device) IsMigCapable() (bool, error) {
return true, nil
}
// IsMigEnabled checks if a device has MIG mode currently enabled on it
// IsMigEnabled checks if a device has MIG mode currently enabled on it.
func (d *device) IsMigEnabled() (bool, error) {
if !d.lib.hasSymbol("nvmlDeviceGetMigMode") {
return false, nil
@@ -183,7 +183,7 @@ func (d *device) IsMigEnabled() (bool, error) {
return (mode == nvml.DEVICE_MIG_ENABLE), nil
}
// VisitMigDevices walks a top-level device and invokes a callback function for each MIG device configured on it
// VisitMigDevices walks a top-level device and invokes a callback function for each MIG device configured on it.
func (d *device) VisitMigDevices(visit func(int, MigDevice) error) error {
capable, err := d.IsMigCapable()
if err != nil {
@@ -221,7 +221,7 @@ func (d *device) VisitMigDevices(visit func(int, MigDevice) error) error {
return nil
}
// VisitMigProfiles walks a top-level device and invokes a callback function for each unique MIG Profile that can be configured on it
// VisitMigProfiles walks a top-level device and invokes a callback function for each unique MIG Profile that can be configured on it.
func (d *device) VisitMigProfiles(visit func(MigProfile) error) error {
capable, err := d.IsMigCapable()
if err != nil {
@@ -283,7 +283,7 @@ func (d *device) VisitMigProfiles(visit func(MigProfile) error) error {
return nil
}
// GetMigDevices gets the set of MIG devices associated with a top-level device
// GetMigDevices gets the set of MIG devices associated with a top-level device.
func (d *device) GetMigDevices() ([]MigDevice, error) {
var migs []MigDevice
err := d.VisitMigDevices(func(j int, m MigDevice) error {
@@ -296,7 +296,7 @@ func (d *device) GetMigDevices() ([]MigDevice, error) {
return migs, nil
}
// GetMigProfiles gets the set of unique MIG profiles associated with a top-level device
// GetMigProfiles gets the set of unique MIG profiles associated with a top-level device.
func (d *device) GetMigProfiles() ([]MigProfile, error) {
// Return the cached list if available
if d.migProfiles != nil {
@@ -313,7 +313,7 @@ func (d *device) GetMigProfiles() ([]MigProfile, error) {
return nil, err
}
// And cache it before returning
// And cache it before returning.
d.migProfiles = profiles
return profiles, nil
}
@@ -332,7 +332,7 @@ func (d *device) isSkipped() (bool, error) {
return false, nil
}
// VisitDevices visits each top-level device and invokes a callback function for it
// VisitDevices visits each top-level device and invokes a callback function for it.
func (d *devicelib) VisitDevices(visit func(int, Device) error) error {
count, ret := d.nvml.DeviceGetCount()
if ret != nvml.SUCCESS {
@@ -365,7 +365,7 @@ func (d *devicelib) VisitDevices(visit func(int, Device) error) error {
return nil
}
// VisitMigDevices walks a top-level device and invokes a callback function for each MIG device configured on it
// VisitMigDevices walks a top-level device and invokes a callback function for each MIG device configured on it.
func (d *devicelib) VisitMigDevices(visit func(int, Device, int, MigDevice) error) error {
err := d.VisitDevices(func(i int, dev Device) error {
err := dev.VisitMigDevices(func(j int, mig MigDevice) error {
@@ -386,7 +386,7 @@ func (d *devicelib) VisitMigDevices(visit func(int, Device, int, MigDevice) erro
return nil
}
// VisitMigProfiles walks a top-level device and invokes a callback function for each unique MIG profile found on them
// VisitMigProfiles walks a top-level device and invokes a callback function for each unique MIG profile found on them.
func (d *devicelib) VisitMigProfiles(visit func(MigProfile) error) error {
visited := make(map[string]bool)
err := d.VisitDevices(func(i int, dev Device) error {
@@ -414,7 +414,7 @@ func (d *devicelib) VisitMigProfiles(visit func(MigProfile) error) error {
return nil
}
// GetDevices gets the set of all top-level devices
// GetDevices gets the set of all top-level devices.
func (d *devicelib) GetDevices() ([]Device, error) {
var devs []Device
err := d.VisitDevices(func(i int, dev Device) error {
@@ -427,7 +427,7 @@ func (d *devicelib) GetDevices() ([]Device, error) {
return devs, nil
}
// GetMigDevices gets the set of MIG devices across all top-level devices
// GetMigDevices gets the set of MIG devices across all top-level devices.
func (d *devicelib) GetMigDevices() ([]MigDevice, error) {
var migs []MigDevice
err := d.VisitMigDevices(func(i int, dev Device, j int, m MigDevice) error {
@@ -440,7 +440,7 @@ func (d *devicelib) GetMigDevices() ([]MigDevice, error) {
return migs, nil
}
// GetMigProfiles gets the set of unique MIG profiles across all top-level devices
// GetMigProfiles gets the set of unique MIG profiles across all top-level devices.
func (d *devicelib) GetMigProfiles() ([]MigProfile, error) {
// Return the cached list if available
if d.migProfiles != nil {
@@ -457,7 +457,7 @@ func (d *devicelib) GetMigProfiles() ([]MigProfile, error) {
return nil, err
}
// And cache it before returning
// And cache it before returning.
d.migProfiles = profiles
return profiles, nil
}
@@ -469,5 +469,5 @@ func (d *devicelib) hasSymbol(symbol string) bool {
return true
}
return d.nvml.Lookup(symbol) == nil
return d.nvml.Extensions().LookupSymbol(symbol) == nil
}

View File

@@ -27,7 +27,7 @@ import (
// This includes a device index or UUID.
type Identifier string
// IsGpuIndex checks if an identifier is a full GPU index
// IsGpuIndex checks if an identifier is a full GPU index.
func (i Identifier) IsGpuIndex() bool {
if _, err := strconv.ParseUint(string(i), 10, 0); err != nil {
return false
@@ -35,7 +35,7 @@ func (i Identifier) IsGpuIndex() bool {
return true
}
// IsMigIndex checks if an identifier is a MIG index
// IsMigIndex checks if an identifier is a MIG index.
func (i Identifier) IsMigIndex() bool {
split := strings.Split(string(i), ":")
if len(split) != 2 {
@@ -49,13 +49,13 @@ func (i Identifier) IsMigIndex() bool {
return true
}
// IsUUID checks if an identifier is a UUID
// IsUUID checks if an identifier is a UUID.
func (i Identifier) IsUUID() bool {
return i.IsGpuUUID() || i.IsMigUUID()
}
// IsGpuUUID checks if an identifier is a GPU UUID
// A GPU UUID must be of the form GPU-b1028956-cfa2-0990-bf4a-5da9abb51763
// IsGpuUUID checks if an identifier is a GPU UUID.
// A GPU UUID must be of the form GPU-b1028956-cfa2-0990-bf4a-5da9abb51763.
func (i Identifier) IsGpuUUID() bool {
if !strings.HasPrefix(string(i), "GPU-") {
return false
@@ -64,7 +64,7 @@ func (i Identifier) IsGpuUUID() bool {
return err == nil
}
// IsMigUUID checks if an identifier is a MIG UUID
// IsMigUUID checks if an identifier is a MIG UUID.
// A MIG UUID can be of one of two forms:
// - MIG-b1028956-cfa2-0990-bf4a-5da9abb51763
// - MIG-GPU-b1028956-cfa2-0990-bf4a-5da9abb51763/3/0

View File

@@ -19,10 +19,10 @@ package device
import (
"fmt"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
)
// MigDevice defines the set of extended functions associated with a MIG device
// MigDevice defines the set of extended functions associated with a MIG device.
type MigDevice interface {
nvml.Device
GetProfile() (MigProfile, error)
@@ -36,7 +36,7 @@ type migdevice struct {
var _ MigDevice = &migdevice{}
// NewMigDevice builds a new MigDevice from an nvml.Device
// NewMigDevice builds a new MigDevice from an nvml.Device.
func (d *devicelib) NewMigDevice(handle nvml.Device) (MigDevice, error) {
isMig, ret := handle.IsMigDeviceHandle()
if ret != nvml.SUCCESS {
@@ -48,7 +48,7 @@ func (d *devicelib) NewMigDevice(handle nvml.Device) (MigDevice, error) {
return &migdevice{handle, d, nil}, nil
}
// NewMigDeviceByUUID builds a new MigDevice from a UUID
// NewMigDeviceByUUID builds a new MigDevice from a UUID.
func (d *devicelib) NewMigDeviceByUUID(uuid string) (MigDevice, error) {
dev, ret := d.nvml.DeviceGetHandleByUUID(uuid)
if ret != nvml.SUCCESS {
@@ -57,7 +57,7 @@ func (d *devicelib) NewMigDeviceByUUID(uuid string) (MigDevice, error) {
return d.NewMigDevice(dev)
}
// GetProfile returns the MIG profile associated with a MIG device
// GetProfile returns the MIG profile associated with a MIG device.
func (m *migdevice) GetProfile() (MigProfile, error) {
if m.profile != nil {
return m.profile, nil

View File

@@ -23,7 +23,7 @@ import (
"strconv"
"strings"
"github.com/NVIDIA/go-nvlib/pkg/nvml"
"github.com/NVIDIA/go-nvml/pkg/nvml"
)
const (
@@ -40,7 +40,7 @@ type MigProfile interface {
Matches(profile string) bool
}
// MigProfileInfo holds all info associated with a specific MIG profile
// MigProfileInfo holds all info associated with a specific MIG profile.
type MigProfileInfo struct {
C int
G int
@@ -119,13 +119,13 @@ func (d *devicelib) NewMigProfile(giProfileID, ciProfileID, ciEngProfileID int,
return p, nil
}
// AssertValidMigProfileFormat checks if the string is in the proper format to represent a MIG profile
// AssertValidMigProfileFormat checks if the string is in the proper format to represent a MIG profile.
func (d *devicelib) AssertValidMigProfileFormat(profile string) error {
_, _, _, _, err := parseMigProfile(profile)
return err
}
// ParseMigProfile converts a string representation of a MigProfile into an object
// ParseMigProfile converts a string representation of a MigProfile into an object.
func (d *devicelib) ParseMigProfile(profile string) (MigProfile, error) {
profiles, err := d.GetMigProfiles()
if err != nil {
@@ -141,7 +141,7 @@ func (d *devicelib) ParseMigProfile(profile string) (MigProfile, error) {
return nil, fmt.Errorf("unable to parse profile string into a valid profile")
}
// String returns the string representation of a Profile
// String returns the string representation of a Profile.
func (p MigProfileInfo) String() string {
var suffix string
if len(p.Attributes) > 0 {
@@ -153,12 +153,12 @@ func (p MigProfileInfo) String() string {
return fmt.Sprintf("%dc.%dg.%dgb%s", p.C, p.G, p.GB, suffix)
}
// GetInfo returns detailed info about a Profile
// GetInfo returns detailed info about a Profile.
func (p MigProfileInfo) GetInfo() MigProfileInfo {
return p
}
// Equals checks if two Profiles are identical or not
// Equals checks if two Profiles are identical or not.
func (p MigProfileInfo) Equals(other MigProfile) bool {
o := other.GetInfo()
if p.C != o.C {
@@ -182,7 +182,7 @@ func (p MigProfileInfo) Equals(other MigProfile) bool {
return true
}
// Matches checks if a MigProfile matches the string passed in
// Matches checks if a MigProfile matches the string passed in.
func (p MigProfileInfo) Matches(profile string) bool {
c, g, gb, attrs, err := parseMigProfile(profile)
if err != nil {
@@ -211,26 +211,26 @@ func (p MigProfileInfo) Matches(profile string) bool {
}
func parseMigProfile(profile string) (int, int, int, []string, error) {
// If we are handed the empty string, we cannot parse it
// If we are handed the empty string, we cannot parse it.
if profile == "" {
return -1, -1, -1, nil, fmt.Errorf("profile is the empty string")
}
// Split by + to separate out attributes
// Split by + to separate out attributes.
split := strings.SplitN(profile, "+", 2)
// Check to make sure the c, g, and gb values match
// Check to make sure the c, g, and gb values match.
c, g, gb, err := parseMigProfileFields(split[0])
if err != nil {
return -1, -1, -1, nil, fmt.Errorf("cannot parse fields of '%v': %v", profile, err)
}
// If we have no attributes we are done
// If we have no attributes we are done.
if len(split) == 1 {
return c, g, gb, nil, nil
}
// Make sure we have the same set of attributes
// Make sure we have the same set of attributes.
attrs, err := parseMigProfileAttributes(split[1])
if err != nil {
return -1, -1, -1, nil, fmt.Errorf("cannot parse attributes of '%v': %v", profile, err)