mirror of
https://github.com/clearml/go-nvlib
synced 2025-03-15 16:11:48 +00:00
Merge branch 'add-arch' into 'main'
Add ability to query device architeture and cuda compute capability See merge request nvidia/cloud-native/go-nvlib!34
This commit is contained in:
commit
13b333d4a6
@ -26,6 +26,8 @@ import (
|
|||||||
// Device defines the set of extended functions associated with a device.Device
|
// Device defines the set of extended functions associated with a device.Device
|
||||||
type Device interface {
|
type Device interface {
|
||||||
nvml.Device
|
nvml.Device
|
||||||
|
GetArchitectureAsString() (string, error)
|
||||||
|
GetCudaComputeCapabilityAsString() (string, error)
|
||||||
GetMigDevices() ([]MigDevice, error)
|
GetMigDevices() ([]MigDevice, error)
|
||||||
GetMigProfiles() ([]MigProfile, error)
|
GetMigProfiles() ([]MigProfile, error)
|
||||||
IsMigCapable() (bool, error)
|
IsMigCapable() (bool, error)
|
||||||
@ -61,6 +63,44 @@ func (d *devicelib) newDevice(dev nvml.Device) (*device, error) {
|
|||||||
return &device{dev, d, nil}, nil
|
return &device{dev, d, nil}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetArchitectureAsString returns the Device architecture as a string
|
||||||
|
func (d *device) GetArchitectureAsString() (string, error) {
|
||||||
|
arch, ret := d.GetArchitecture()
|
||||||
|
if ret != nvml.SUCCESS {
|
||||||
|
return "", fmt.Errorf("error getting device architecture: %v", ret)
|
||||||
|
}
|
||||||
|
switch arch {
|
||||||
|
case nvml.DEVICE_ARCH_KEPLER:
|
||||||
|
return "Kepler", nil
|
||||||
|
case nvml.DEVICE_ARCH_MAXWELL:
|
||||||
|
return "Maxwell", nil
|
||||||
|
case nvml.DEVICE_ARCH_PASCAL:
|
||||||
|
return "Pascal", nil
|
||||||
|
case nvml.DEVICE_ARCH_VOLTA:
|
||||||
|
return "Volta", nil
|
||||||
|
case nvml.DEVICE_ARCH_TURING:
|
||||||
|
return "Turing", nil
|
||||||
|
case nvml.DEVICE_ARCH_AMPERE:
|
||||||
|
return "Ampere", nil
|
||||||
|
case nvml.DEVICE_ARCH_ADA:
|
||||||
|
return "Ada", nil
|
||||||
|
case nvml.DEVICE_ARCH_HOPPER:
|
||||||
|
return "Hopper", nil
|
||||||
|
case nvml.DEVICE_ARCH_UNKNOWN:
|
||||||
|
return "Unknown", nil
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("error interpreting device architecture as string: %v", arch)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCudaComputeCapabilityAsString returns the Device's CUDA compute capability as a version string
|
||||||
|
func (d *device) GetCudaComputeCapabilityAsString() (string, error) {
|
||||||
|
major, minor, ret := d.GetCudaComputeCapability()
|
||||||
|
if ret != nvml.SUCCESS {
|
||||||
|
return "", fmt.Errorf("error getting CUDA compute capability: %v", ret)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%d.%d", major, minor), nil
|
||||||
|
}
|
||||||
|
|
||||||
// IsMigCapable checks if a device is capable of having MIG paprtitions created on it
|
// IsMigCapable checks if a device is capable of having MIG paprtitions created on it
|
||||||
func (d *device) IsMigCapable() (bool, error) {
|
func (d *device) IsMigCapable() (bool, error) {
|
||||||
err := d.lib.nvmlLookupSymbol("nvmlDeviceGetMigMode")
|
err := d.lib.nvmlLookupSymbol("nvmlDeviceGetMigMode")
|
||||||
|
@ -49,6 +49,19 @@ const (
|
|||||||
ERROR_UNKNOWN = Return(nvml.ERROR_UNKNOWN)
|
ERROR_UNKNOWN = Return(nvml.ERROR_UNKNOWN)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Device architecture constants
|
||||||
|
const (
|
||||||
|
DEVICE_ARCH_KEPLER = nvml.DEVICE_ARCH_KEPLER
|
||||||
|
DEVICE_ARCH_MAXWELL = nvml.DEVICE_ARCH_MAXWELL
|
||||||
|
DEVICE_ARCH_PASCAL = nvml.DEVICE_ARCH_PASCAL
|
||||||
|
DEVICE_ARCH_VOLTA = nvml.DEVICE_ARCH_VOLTA
|
||||||
|
DEVICE_ARCH_TURING = nvml.DEVICE_ARCH_TURING
|
||||||
|
DEVICE_ARCH_AMPERE = nvml.DEVICE_ARCH_AMPERE
|
||||||
|
DEVICE_ARCH_ADA = nvml.DEVICE_ARCH_ADA
|
||||||
|
DEVICE_ARCH_HOPPER = nvml.DEVICE_ARCH_HOPPER
|
||||||
|
DEVICE_ARCH_UNKNOWN = nvml.DEVICE_ARCH_UNKNOWN
|
||||||
|
)
|
||||||
|
|
||||||
// MIG Mode constants
|
// MIG Mode constants
|
||||||
const (
|
const (
|
||||||
DEVICE_MIG_ENABLE = nvml.DEVICE_MIG_ENABLE
|
DEVICE_MIG_ENABLE = nvml.DEVICE_MIG_ENABLE
|
||||||
|
@ -150,12 +150,18 @@ func (d nvmlDevice) GetAttributes() (DeviceAttributes, Return) {
|
|||||||
return DeviceAttributes(a), Return(r)
|
return DeviceAttributes(a), Return(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetName returns the device attributes for a MIG device
|
// GetName returns the product name of a Device
|
||||||
func (d nvmlDevice) GetName() (string, Return) {
|
func (d nvmlDevice) GetName() (string, Return) {
|
||||||
n, r := nvml.Device(d).GetName()
|
n, r := nvml.Device(d).GetName()
|
||||||
return n, Return(r)
|
return n, Return(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetArchitecture returns the architecture of a Device
|
||||||
|
func (d nvmlDevice) GetArchitecture() (DeviceArchitecture, Return) {
|
||||||
|
a, r := nvml.Device(d).GetArchitecture()
|
||||||
|
return DeviceArchitecture(a), Return(r)
|
||||||
|
}
|
||||||
|
|
||||||
// RegisterEvents registers the specified event set and type with the device
|
// RegisterEvents registers the specified event set and type with the device
|
||||||
func (d nvmlDevice) RegisterEvents(EventTypes uint64, Set EventSet) Return {
|
func (d nvmlDevice) RegisterEvents(EventTypes uint64, Set EventSet) Return {
|
||||||
return Return(nvml.Device(d).RegisterEvents(EventTypes, nvml.EventSet(Set)))
|
return Return(nvml.Device(d).RegisterEvents(EventTypes, nvml.EventSet(Set)))
|
||||||
|
@ -20,6 +20,9 @@ var _ Device = &DeviceMock{}
|
|||||||
// CreateGpuInstanceWithPlacementFunc: func(gpuInstanceProfileInfo *GpuInstanceProfileInfo, gpuInstancePlacement *GpuInstancePlacement) (GpuInstance, Return) {
|
// CreateGpuInstanceWithPlacementFunc: func(gpuInstanceProfileInfo *GpuInstanceProfileInfo, gpuInstancePlacement *GpuInstancePlacement) (GpuInstance, Return) {
|
||||||
// panic("mock out the CreateGpuInstanceWithPlacement method")
|
// panic("mock out the CreateGpuInstanceWithPlacement method")
|
||||||
// },
|
// },
|
||||||
|
// GetArchitectureFunc: func() (DeviceArchitecture, Return) {
|
||||||
|
// panic("mock out the GetArchitecture method")
|
||||||
|
// },
|
||||||
// GetAttributesFunc: func() (DeviceAttributes, Return) {
|
// GetAttributesFunc: func() (DeviceAttributes, Return) {
|
||||||
// panic("mock out the GetAttributes method")
|
// panic("mock out the GetAttributes method")
|
||||||
// },
|
// },
|
||||||
@ -96,6 +99,9 @@ type DeviceMock struct {
|
|||||||
// CreateGpuInstanceWithPlacementFunc mocks the CreateGpuInstanceWithPlacement method.
|
// CreateGpuInstanceWithPlacementFunc mocks the CreateGpuInstanceWithPlacement method.
|
||||||
CreateGpuInstanceWithPlacementFunc func(gpuInstanceProfileInfo *GpuInstanceProfileInfo, gpuInstancePlacement *GpuInstancePlacement) (GpuInstance, Return)
|
CreateGpuInstanceWithPlacementFunc func(gpuInstanceProfileInfo *GpuInstanceProfileInfo, gpuInstancePlacement *GpuInstancePlacement) (GpuInstance, Return)
|
||||||
|
|
||||||
|
// GetArchitectureFunc mocks the GetArchitecture method.
|
||||||
|
GetArchitectureFunc func() (DeviceArchitecture, Return)
|
||||||
|
|
||||||
// GetAttributesFunc mocks the GetAttributes method.
|
// GetAttributesFunc mocks the GetAttributes method.
|
||||||
GetAttributesFunc func() (DeviceAttributes, Return)
|
GetAttributesFunc func() (DeviceAttributes, Return)
|
||||||
|
|
||||||
@ -171,6 +177,9 @@ type DeviceMock struct {
|
|||||||
// GpuInstancePlacement is the gpuInstancePlacement argument value.
|
// GpuInstancePlacement is the gpuInstancePlacement argument value.
|
||||||
GpuInstancePlacement *GpuInstancePlacement
|
GpuInstancePlacement *GpuInstancePlacement
|
||||||
}
|
}
|
||||||
|
// GetArchitecture holds details about calls to the GetArchitecture method.
|
||||||
|
GetArchitecture []struct {
|
||||||
|
}
|
||||||
// GetAttributes holds details about calls to the GetAttributes method.
|
// GetAttributes holds details about calls to the GetAttributes method.
|
||||||
GetAttributes []struct {
|
GetAttributes []struct {
|
||||||
}
|
}
|
||||||
@ -255,6 +264,7 @@ type DeviceMock struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
lockCreateGpuInstanceWithPlacement sync.RWMutex
|
lockCreateGpuInstanceWithPlacement sync.RWMutex
|
||||||
|
lockGetArchitecture sync.RWMutex
|
||||||
lockGetAttributes sync.RWMutex
|
lockGetAttributes sync.RWMutex
|
||||||
lockGetComputeInstanceId sync.RWMutex
|
lockGetComputeInstanceId sync.RWMutex
|
||||||
lockGetCudaComputeCapability sync.RWMutex
|
lockGetCudaComputeCapability sync.RWMutex
|
||||||
@ -315,6 +325,33 @@ func (mock *DeviceMock) CreateGpuInstanceWithPlacementCalls() []struct {
|
|||||||
return calls
|
return calls
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetArchitecture calls GetArchitectureFunc.
|
||||||
|
func (mock *DeviceMock) GetArchitecture() (DeviceArchitecture, Return) {
|
||||||
|
if mock.GetArchitectureFunc == nil {
|
||||||
|
panic("DeviceMock.GetArchitectureFunc: method is nil but Device.GetArchitecture was just called")
|
||||||
|
}
|
||||||
|
callInfo := struct {
|
||||||
|
}{}
|
||||||
|
mock.lockGetArchitecture.Lock()
|
||||||
|
mock.calls.GetArchitecture = append(mock.calls.GetArchitecture, callInfo)
|
||||||
|
mock.lockGetArchitecture.Unlock()
|
||||||
|
return mock.GetArchitectureFunc()
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetArchitectureCalls gets all the calls that were made to GetArchitecture.
|
||||||
|
// Check the length with:
|
||||||
|
//
|
||||||
|
// len(mockedDevice.GetArchitectureCalls())
|
||||||
|
func (mock *DeviceMock) GetArchitectureCalls() []struct {
|
||||||
|
} {
|
||||||
|
var calls []struct {
|
||||||
|
}
|
||||||
|
mock.lockGetArchitecture.RLock()
|
||||||
|
calls = mock.calls.GetArchitecture
|
||||||
|
mock.lockGetArchitecture.RUnlock()
|
||||||
|
return calls
|
||||||
|
}
|
||||||
|
|
||||||
// GetAttributes calls GetAttributesFunc.
|
// GetAttributes calls GetAttributesFunc.
|
||||||
func (mock *DeviceMock) GetAttributes() (DeviceAttributes, Return) {
|
func (mock *DeviceMock) GetAttributes() (DeviceAttributes, Return) {
|
||||||
if mock.GetAttributesFunc == nil {
|
if mock.GetAttributesFunc == nil {
|
||||||
|
@ -40,6 +40,7 @@ type Interface interface {
|
|||||||
//go:generate moq -out device_mock.go . Device
|
//go:generate moq -out device_mock.go . Device
|
||||||
type Device interface {
|
type Device interface {
|
||||||
CreateGpuInstanceWithPlacement(*GpuInstanceProfileInfo, *GpuInstancePlacement) (GpuInstance, Return)
|
CreateGpuInstanceWithPlacement(*GpuInstanceProfileInfo, *GpuInstancePlacement) (GpuInstance, Return)
|
||||||
|
GetArchitecture() (DeviceArchitecture, Return)
|
||||||
GetAttributes() (DeviceAttributes, Return)
|
GetAttributes() (DeviceAttributes, Return)
|
||||||
GetComputeInstanceId() (int, Return)
|
GetComputeInstanceId() (int, Return)
|
||||||
GetCudaComputeCapability() (int, int, Return)
|
GetCudaComputeCapability() (int, int, Return)
|
||||||
@ -136,3 +137,6 @@ type ComputeInstancePlacement nvml.ComputeInstancePlacement
|
|||||||
|
|
||||||
// DeviceAttributes stores information about MIG devices
|
// DeviceAttributes stores information about MIG devices
|
||||||
type DeviceAttributes nvml.DeviceAttributes
|
type DeviceAttributes nvml.DeviceAttributes
|
||||||
|
|
||||||
|
// DeviceArchitecture represents the hardware architecture of a GPU device
|
||||||
|
type DeviceArchitecture nvml.DeviceArchitecture
|
||||||
|
Loading…
Reference in New Issue
Block a user