Merge branch 'add-get-cuda-driver-version' into 'main'

Add additional functions to nvml interfaces

See merge request nvidia/cloud-native/go-nvlib!18
This commit is contained in:
Evan Lezar 2022-09-05 13:31:28 +00:00
commit a880a67681
10 changed files with 219 additions and 48 deletions

2
go.mod
View File

@ -3,6 +3,6 @@ module gitlab.com/nvidia/cloud-native/go-nvlib
go 1.16
require (
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220614115128-31f8b89eb740
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
github.com/stretchr/testify v1.7.0
)

4
go.sum
View File

@ -1,5 +1,5 @@
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220614115128-31f8b89eb740 h1:eOqFxx5XKIcw4U0YTIndTWWFCIJvuagS+v8IaM7XvBU=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220614115128-31f8b89eb740/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82 h1:x751Xx1tdxkiA/sdkv2J769n21UbYKzVOpe9S/h1M3k=
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

View File

@ -115,3 +115,21 @@ func (d nvmlDevice) GetComputeInstanceId() (int, Return) {
ci, r := nvml.Device(d).GetComputeInstanceId()
return ci, Return(r)
}
// GetCudaComputeCapability returns the compute capability major and minor versions for a device
func (d nvmlDevice) GetCudaComputeCapability() (int, int, Return) {
major, minor, r := nvml.Device(d).GetCudaComputeCapability()
return major, minor, Return(r)
}
// GetAttributes returns the device attributes for a MIG device
func (d nvmlDevice) GetAttributes() (DeviceAttributes, Return) {
a, r := nvml.Device(d).GetAttributes()
return DeviceAttributes(a), Return(r)
}
// GetName returns the device attributes for a MIG device
func (d nvmlDevice) GetName() (string, Return) {
n, r := nvml.Device(d).GetName()
return n, Return(r)
}

View File

@ -17,9 +17,15 @@ var _ Device = &DeviceMock{}
//
// // make and configure a mocked Device
// mockedDevice := &DeviceMock{
// GetAttributesFunc: func() (DeviceAttributes, Return) {
// panic("mock out the GetAttributes method")
// },
// GetComputeInstanceIdFunc: func() (int, Return) {
// panic("mock out the GetComputeInstanceId method")
// },
// GetCudaComputeCapabilityFunc: func() (int, int, Return) {
// panic("mock out the GetCudaComputeCapability method")
// },
// GetDeviceHandleFromMigDeviceHandleFunc: func() (Device, Return) {
// panic("mock out the GetDeviceHandleFromMigDeviceHandle method")
// },
@ -50,6 +56,9 @@ var _ Device = &DeviceMock{}
// GetMinorNumberFunc: func() (int, Return) {
// panic("mock out the GetMinorNumber method")
// },
// GetNameFunc: func() (string, Return) {
// panic("mock out the GetName method")
// },
// GetPciInfoFunc: func() (PciInfo, Return) {
// panic("mock out the GetPciInfo method")
// },
@ -69,9 +78,15 @@ var _ Device = &DeviceMock{}
//
// }
type DeviceMock struct {
// GetAttributesFunc mocks the GetAttributes method.
GetAttributesFunc func() (DeviceAttributes, Return)
// GetComputeInstanceIdFunc mocks the GetComputeInstanceId method.
GetComputeInstanceIdFunc func() (int, Return)
// GetCudaComputeCapabilityFunc mocks the GetCudaComputeCapability method.
GetCudaComputeCapabilityFunc func() (int, int, Return)
// GetDeviceHandleFromMigDeviceHandleFunc mocks the GetDeviceHandleFromMigDeviceHandle method.
GetDeviceHandleFromMigDeviceHandleFunc func() (Device, Return)
@ -102,6 +117,9 @@ type DeviceMock struct {
// GetMinorNumberFunc mocks the GetMinorNumber method.
GetMinorNumberFunc func() (int, Return)
// GetNameFunc mocks the GetName method.
GetNameFunc func() (string, Return)
// GetPciInfoFunc mocks the GetPciInfo method.
GetPciInfoFunc func() (PciInfo, Return)
@ -116,9 +134,15 @@ type DeviceMock struct {
// calls tracks calls to the methods.
calls struct {
// GetAttributes holds details about calls to the GetAttributes method.
GetAttributes []struct {
}
// GetComputeInstanceId holds details about calls to the GetComputeInstanceId method.
GetComputeInstanceId []struct {
}
// GetCudaComputeCapability holds details about calls to the GetCudaComputeCapability method.
GetCudaComputeCapability []struct {
}
// GetDeviceHandleFromMigDeviceHandle holds details about calls to the GetDeviceHandleFromMigDeviceHandle method.
GetDeviceHandleFromMigDeviceHandle []struct {
}
@ -155,6 +179,9 @@ type DeviceMock struct {
// GetMinorNumber holds details about calls to the GetMinorNumber method.
GetMinorNumber []struct {
}
// GetName holds details about calls to the GetName method.
GetName []struct {
}
// GetPciInfo holds details about calls to the GetPciInfo method.
GetPciInfo []struct {
}
@ -170,7 +197,9 @@ type DeviceMock struct {
Mode int
}
}
lockGetAttributes sync.RWMutex
lockGetComputeInstanceId sync.RWMutex
lockGetCudaComputeCapability sync.RWMutex
lockGetDeviceHandleFromMigDeviceHandle sync.RWMutex
lockGetGpuInstanceId sync.RWMutex
lockGetGpuInstanceProfileInfo sync.RWMutex
@ -181,12 +210,39 @@ type DeviceMock struct {
lockGetMigDeviceHandleByIndex sync.RWMutex
lockGetMigMode sync.RWMutex
lockGetMinorNumber sync.RWMutex
lockGetName sync.RWMutex
lockGetPciInfo sync.RWMutex
lockGetUUID sync.RWMutex
lockIsMigDeviceHandle sync.RWMutex
lockSetMigMode sync.RWMutex
}
// GetAttributes calls GetAttributesFunc.
func (mock *DeviceMock) GetAttributes() (DeviceAttributes, Return) {
if mock.GetAttributesFunc == nil {
panic("DeviceMock.GetAttributesFunc: method is nil but Device.GetAttributes was just called")
}
callInfo := struct {
}{}
mock.lockGetAttributes.Lock()
mock.calls.GetAttributes = append(mock.calls.GetAttributes, callInfo)
mock.lockGetAttributes.Unlock()
return mock.GetAttributesFunc()
}
// GetAttributesCalls gets all the calls that were made to GetAttributes.
// Check the length with:
// len(mockedDevice.GetAttributesCalls())
func (mock *DeviceMock) GetAttributesCalls() []struct {
} {
var calls []struct {
}
mock.lockGetAttributes.RLock()
calls = mock.calls.GetAttributes
mock.lockGetAttributes.RUnlock()
return calls
}
// GetComputeInstanceId calls GetComputeInstanceIdFunc.
func (mock *DeviceMock) GetComputeInstanceId() (int, Return) {
if mock.GetComputeInstanceIdFunc == nil {
@ -213,6 +269,32 @@ func (mock *DeviceMock) GetComputeInstanceIdCalls() []struct {
return calls
}
// GetCudaComputeCapability calls GetCudaComputeCapabilityFunc.
func (mock *DeviceMock) GetCudaComputeCapability() (int, int, Return) {
if mock.GetCudaComputeCapabilityFunc == nil {
panic("DeviceMock.GetCudaComputeCapabilityFunc: method is nil but Device.GetCudaComputeCapability was just called")
}
callInfo := struct {
}{}
mock.lockGetCudaComputeCapability.Lock()
mock.calls.GetCudaComputeCapability = append(mock.calls.GetCudaComputeCapability, callInfo)
mock.lockGetCudaComputeCapability.Unlock()
return mock.GetCudaComputeCapabilityFunc()
}
// GetCudaComputeCapabilityCalls gets all the calls that were made to GetCudaComputeCapability.
// Check the length with:
// len(mockedDevice.GetCudaComputeCapabilityCalls())
func (mock *DeviceMock) GetCudaComputeCapabilityCalls() []struct {
} {
var calls []struct {
}
mock.lockGetCudaComputeCapability.RLock()
calls = mock.calls.GetCudaComputeCapability
mock.lockGetCudaComputeCapability.RUnlock()
return calls
}
// GetDeviceHandleFromMigDeviceHandle calls GetDeviceHandleFromMigDeviceHandleFunc.
func (mock *DeviceMock) GetDeviceHandleFromMigDeviceHandle() (Device, Return) {
if mock.GetDeviceHandleFromMigDeviceHandleFunc == nil {
@ -488,6 +570,32 @@ func (mock *DeviceMock) GetMinorNumberCalls() []struct {
return calls
}
// GetName calls GetNameFunc.
func (mock *DeviceMock) GetName() (string, Return) {
if mock.GetNameFunc == nil {
panic("DeviceMock.GetNameFunc: method is nil but Device.GetName was just called")
}
callInfo := struct {
}{}
mock.lockGetName.Lock()
mock.calls.GetName = append(mock.calls.GetName, callInfo)
mock.lockGetName.Unlock()
return mock.GetNameFunc()
}
// GetNameCalls gets all the calls that were made to GetName.
// Check the length with:
// len(mockedDevice.GetNameCalls())
func (mock *DeviceMock) GetNameCalls() []struct {
} {
var calls []struct {
}
mock.lockGetName.RLock()
calls = mock.calls.GetName
mock.lockGetName.RUnlock()
return calls
}
// GetPciInfo calls GetPciInfoFunc.
func (mock *DeviceMock) GetPciInfo() (PciInfo, Return) {
if mock.GetPciInfoFunc == nil {

View File

@ -46,7 +46,7 @@ func (n *nvmlLib) Init() Return {
if n.refcount == 0 {
errorStringFunc = nvml.ErrorString
}
n.refcount += 1
n.refcount++
return SUCCESS
}
@ -60,7 +60,7 @@ func (n *nvmlLib) Shutdown() Return {
n.Lock()
defer n.Unlock()
n.refcount -= 1
n.refcount--
if n.refcount == 0 {
errorStringFunc = defaultErrorStringFunc
}
@ -92,6 +92,12 @@ func (n *nvmlLib) SystemGetDriverVersion() (string, Return) {
return v, Return(r)
}
// SystemGetCudaDriverVersion returns the version of CUDA associated with the NVIDIA driver
func (n *nvmlLib) SystemGetCudaDriverVersion() (int, Return) {
v, r := nvml.SystemGetCudaDriverVersion()
return v, Return(r)
}
// ErrorString returns the error string associated with a given return value
func (n *nvmlLib) ErrorString(ret Return) string {
return nvml.ErrorString(nvml.Return(ret))

View File

@ -35,6 +35,9 @@ var _ Interface = &InterfaceMock{}
// ShutdownFunc: func() Return {
// panic("mock out the Shutdown method")
// },
// SystemGetCudaDriverVersionFunc: func() (int, Return) {
// panic("mock out the SystemGetCudaDriverVersion method")
// },
// SystemGetDriverVersionFunc: func() (string, Return) {
// panic("mock out the SystemGetDriverVersion method")
// },
@ -63,6 +66,9 @@ type InterfaceMock struct {
// ShutdownFunc mocks the Shutdown method.
ShutdownFunc func() Return
// SystemGetCudaDriverVersionFunc mocks the SystemGetCudaDriverVersion method.
SystemGetCudaDriverVersionFunc func() (int, Return)
// SystemGetDriverVersionFunc mocks the SystemGetDriverVersion method.
SystemGetDriverVersionFunc func() (string, Return)
@ -92,17 +98,21 @@ type InterfaceMock struct {
// Shutdown holds details about calls to the Shutdown method.
Shutdown []struct {
}
// SystemGetCudaDriverVersion holds details about calls to the SystemGetCudaDriverVersion method.
SystemGetCudaDriverVersion []struct {
}
// SystemGetDriverVersion holds details about calls to the SystemGetDriverVersion method.
SystemGetDriverVersion []struct {
}
}
lockDeviceGetCount sync.RWMutex
lockDeviceGetHandleByIndex sync.RWMutex
lockDeviceGetHandleByUUID sync.RWMutex
lockErrorString sync.RWMutex
lockInit sync.RWMutex
lockShutdown sync.RWMutex
lockSystemGetDriverVersion sync.RWMutex
lockDeviceGetCount sync.RWMutex
lockDeviceGetHandleByIndex sync.RWMutex
lockDeviceGetHandleByUUID sync.RWMutex
lockErrorString sync.RWMutex
lockInit sync.RWMutex
lockShutdown sync.RWMutex
lockSystemGetCudaDriverVersion sync.RWMutex
lockSystemGetDriverVersion sync.RWMutex
}
// DeviceGetCount calls DeviceGetCountFunc.
@ -276,6 +286,32 @@ func (mock *InterfaceMock) ShutdownCalls() []struct {
return calls
}
// SystemGetCudaDriverVersion calls SystemGetCudaDriverVersionFunc.
func (mock *InterfaceMock) SystemGetCudaDriverVersion() (int, Return) {
if mock.SystemGetCudaDriverVersionFunc == nil {
panic("InterfaceMock.SystemGetCudaDriverVersionFunc: method is nil but Interface.SystemGetCudaDriverVersion was just called")
}
callInfo := struct {
}{}
mock.lockSystemGetCudaDriverVersion.Lock()
mock.calls.SystemGetCudaDriverVersion = append(mock.calls.SystemGetCudaDriverVersion, callInfo)
mock.lockSystemGetCudaDriverVersion.Unlock()
return mock.SystemGetCudaDriverVersionFunc()
}
// SystemGetCudaDriverVersionCalls gets all the calls that were made to SystemGetCudaDriverVersion.
// Check the length with:
// len(mockedInterface.SystemGetCudaDriverVersionCalls())
func (mock *InterfaceMock) SystemGetCudaDriverVersionCalls() []struct {
} {
var calls []struct {
}
mock.lockSystemGetCudaDriverVersion.RLock()
calls = mock.calls.SystemGetCudaDriverVersion
mock.lockSystemGetCudaDriverVersion.RUnlock()
return calls
}
// SystemGetDriverVersion calls SystemGetDriverVersionFunc.
func (mock *InterfaceMock) SystemGetDriverVersion() (string, Return) {
if mock.SystemGetDriverVersionFunc == nil {

View File

@ -20,53 +20,61 @@ import (
"github.com/NVIDIA/go-nvml/pkg/nvml"
)
//go:generate moq -out nvml_mock.go . Interface
// Interface defines the functions implemented by an NVML library
//
//go:generate moq -out nvml_mock.go . Interface
type Interface interface {
Init() Return
Shutdown() Return
DeviceGetCount() (int, Return)
DeviceGetHandleByIndex(Index int) (Device, Return)
DeviceGetHandleByUUID(UUID string) (Device, Return)
SystemGetDriverVersion() (string, Return)
ErrorString(r Return) string
Init() Return
Shutdown() Return
SystemGetCudaDriverVersion() (int, Return)
SystemGetDriverVersion() (string, Return)
}
//go:generate moq -out device_mock.go . Device
// Device defines the functions implemented by an NVML device
//
//go:generate moq -out device_mock.go . Device
type Device interface {
GetIndex() (int, Return)
GetPciInfo() (PciInfo, Return)
GetMemoryInfo() (Memory, Return)
GetUUID() (string, Return)
GetMinorNumber() (int, Return)
IsMigDeviceHandle() (bool, Return)
GetAttributes() (DeviceAttributes, Return)
GetComputeInstanceId() (int, Return)
GetCudaComputeCapability() (int, int, Return)
GetDeviceHandleFromMigDeviceHandle() (Device, Return)
SetMigMode(Mode int) (Return, Return)
GetMigMode() (int, int, Return)
GetGpuInstanceId() (int, Return)
GetGpuInstanceProfileInfo(Profile int) (GpuInstanceProfileInfo, Return)
GetGpuInstances(Info *GpuInstanceProfileInfo) ([]GpuInstance, Return)
GetIndex() (int, Return)
GetMaxMigDeviceCount() (int, Return)
GetMemoryInfo() (Memory, Return)
GetMigDeviceHandleByIndex(Index int) (Device, Return)
GetGpuInstanceId() (int, Return)
GetComputeInstanceId() (int, Return)
GetMigMode() (int, int, Return)
GetMinorNumber() (int, Return)
GetName() (string, Return)
GetPciInfo() (PciInfo, Return)
GetUUID() (string, Return)
IsMigDeviceHandle() (bool, Return)
SetMigMode(Mode int) (Return, Return)
}
//go:generate moq -out gi_mock.go . GpuInstance
// GpuInstance defines the functions implemented by a GpuInstance
//
//go:generate moq -out gi_mock.go . GpuInstance
type GpuInstance interface {
GetInfo() (GpuInstanceInfo, Return)
GetComputeInstanceProfileInfo(Profile int, EngProfile int) (ComputeInstanceProfileInfo, Return)
CreateComputeInstance(Info *ComputeInstanceProfileInfo) (ComputeInstance, Return)
GetComputeInstances(Info *ComputeInstanceProfileInfo) ([]ComputeInstance, Return)
Destroy() Return
GetComputeInstanceProfileInfo(Profile int, EngProfile int) (ComputeInstanceProfileInfo, Return)
GetComputeInstances(Info *ComputeInstanceProfileInfo) ([]ComputeInstance, Return)
GetInfo() (GpuInstanceInfo, Return)
}
//go:generate moq -out ci_mock.go . ComputeInstance
// ComputeInstance defines the functions implemented by a ComputeInstance
//
//go:generate moq -out ci_mock.go . ComputeInstance
type ComputeInstance interface {
GetInfo() (ComputeInstanceInfo, Return)
Destroy() Return
GetInfo() (ComputeInstanceInfo, Return)
}
// GpuInstanceInfo holds info about a GPU Instance
@ -92,7 +100,7 @@ type Return nvml.Return
// Memory holds info about GPU device memory
type Memory nvml.Memory
//PciInfo holds info about the PCI connections of a GPU dvice
// PciInfo holds info about the PCI connections of a GPU dvice
type PciInfo nvml.PciInfo
// GpuInstanceProfileInfo holds info about a GPU Instance Profile
@ -106,3 +114,6 @@ type ComputeInstanceProfileInfo nvml.ComputeInstanceProfileInfo
// ComputeInstancePlacement holds placement info about a Compute Instance
type ComputeInstancePlacement nvml.ComputeInstancePlacement
// DeviceAttributes stores information about MIG devices
type DeviceAttributes nvml.DeviceAttributes

View File

@ -38,21 +38,21 @@ func DeviceGetHandleByIndex(Index int) (Device, Return) {
// nvml.DeviceGetHandleBySerial()
func DeviceGetHandleBySerial(Serial string) (Device, Return) {
var Device Device
ret := nvmlDeviceGetHandleBySerial(Serial, &Device)
ret := nvmlDeviceGetHandleBySerial(Serial + string(rune(0)), &Device)
return Device, ret
}
// nvml.DeviceGetHandleByUUID()
func DeviceGetHandleByUUID(Uuid string) (Device, Return) {
var Device Device
ret := nvmlDeviceGetHandleByUUID(Uuid, &Device)
ret := nvmlDeviceGetHandleByUUID(Uuid + string(rune(0)), &Device)
return Device, ret
}
// nvml.DeviceGetHandleByPciBusId()
func DeviceGetHandleByPciBusId(PciBusId string) (Device, Return) {
var Device Device
ret := nvmlDeviceGetHandleByPciBusId(PciBusId, &Device)
ret := nvmlDeviceGetHandleByPciBusId(PciBusId + string(rune(0)), &Device)
return Device, ret
}

View File

@ -32,13 +32,9 @@ var nvml *dl.DynamicLibrary
// nvml.Init()
func Init() Return {
lib := dl.New(nvmlLibraryName, nvmlLibraryLoadFlags)
if lib == nil {
panic(fmt.Sprintf("error instantiating DynamicLibrary for %s", nvmlLibraryName))
}
err := lib.Open()
if err != nil {
panic(fmt.Sprintf("error opening %s: %v", nvmlLibraryName, err))
return ERROR_LIBRARY_NOT_FOUND
}
nvml = lib
@ -50,13 +46,9 @@ func Init() Return {
// nvml.InitWithFlags()
func InitWithFlags(Flags uint32) Return {
lib := dl.New(nvmlLibraryName, nvmlLibraryLoadFlags)
if lib == nil {
panic(fmt.Sprintf("error instantiating DynamicLibrary for %s", nvmlLibraryName))
}
err := lib.Open()
if err != nil {
panic(fmt.Sprintf("error opening %s: %v", nvmlLibraryName, err))
return ERROR_LIBRARY_NOT_FOUND
}
nvml = lib

2
vendor/modules.txt vendored
View File

@ -1,4 +1,4 @@
# github.com/NVIDIA/go-nvml v0.11.6-0.0.20220614115128-31f8b89eb740
# github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
## explicit
github.com/NVIDIA/go-nvml/pkg/dl
github.com/NVIDIA/go-nvml/pkg/nvml