mirror of
https://github.com/clearml/go-nvlib
synced 2025-04-06 13:54:58 +00:00
Merge pull request #7 from NVIDIA/add-nvlink-functions
Add functions related to NVLink info
This commit is contained in:
commit
9fd385bace
@ -20,6 +20,11 @@ import (
|
|||||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// General untyped constants
|
||||||
|
const (
|
||||||
|
NVLINK_MAX_LINKS = nvml.NVLINK_MAX_LINKS
|
||||||
|
)
|
||||||
|
|
||||||
// Return constants
|
// Return constants
|
||||||
const (
|
const (
|
||||||
SUCCESS = Return(nvml.SUCCESS)
|
SUCCESS = Return(nvml.SUCCESS)
|
||||||
@ -131,3 +136,19 @@ const (
|
|||||||
EventTypeSingleBitEccError = nvml.EventTypeSingleBitEccError
|
EventTypeSingleBitEccError = nvml.EventTypeSingleBitEccError
|
||||||
EventTypeDoubleBitEccError = nvml.EventTypeDoubleBitEccError
|
EventTypeDoubleBitEccError = nvml.EventTypeDoubleBitEccError
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// GPU Topology enumeration
|
||||||
|
const (
|
||||||
|
TOPOLOGY_INTERNAL = GpuTopologyLevel(nvml.TOPOLOGY_INTERNAL)
|
||||||
|
TOPOLOGY_SINGLE = GpuTopologyLevel(nvml.TOPOLOGY_SINGLE)
|
||||||
|
TOPOLOGY_MULTIPLE = GpuTopologyLevel(nvml.TOPOLOGY_MULTIPLE)
|
||||||
|
TOPOLOGY_HOSTBRIDGE = GpuTopologyLevel(nvml.TOPOLOGY_HOSTBRIDGE)
|
||||||
|
TOPOLOGY_NODE = GpuTopologyLevel(nvml.TOPOLOGY_NODE)
|
||||||
|
TOPOLOGY_SYSTEM = GpuTopologyLevel(nvml.TOPOLOGY_SYSTEM)
|
||||||
|
)
|
||||||
|
|
||||||
|
// Generic enable/disable constants
|
||||||
|
const (
|
||||||
|
FEATURE_DISABLED = EnableState(nvml.FEATURE_DISABLED)
|
||||||
|
FEATURE_ENABLED = EnableState(nvml.FEATURE_ENABLED)
|
||||||
|
)
|
||||||
|
@ -178,3 +178,27 @@ func (d nvmlDevice) GetSupportedEventTypes() (uint64, Return) {
|
|||||||
e, r := nvml.Device(d).GetSupportedEventTypes()
|
e, r := nvml.Device(d).GetSupportedEventTypes()
|
||||||
return e, Return(r)
|
return e, Return(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetTopologyCommonAncestor retrieves the common ancestor for two devices.
|
||||||
|
func (d nvmlDevice) GetTopologyCommonAncestor(o Device) (GpuTopologyLevel, Return) {
|
||||||
|
other, ok := o.(nvmlDevice)
|
||||||
|
if !ok {
|
||||||
|
return 0, ERROR_INVALID_ARGUMENT
|
||||||
|
}
|
||||||
|
|
||||||
|
l, r := nvml.Device(d).GetTopologyCommonAncestor(nvml.Device(other))
|
||||||
|
return GpuTopologyLevel(l), Return(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNvLinkState retrieves the state of the device's NvLink for the link specified.
|
||||||
|
func (d nvmlDevice) GetNvLinkState(link int) (EnableState, Return) {
|
||||||
|
s, r := nvml.Device(d).GetNvLinkState(link)
|
||||||
|
return EnableState(s), Return(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNvLinkRemotePciInfo retrieves the PCI information for the remote node on a NvLink link.
|
||||||
|
// Note: pciSubSystemId is not filled in this function and is indeterminate.
|
||||||
|
func (d nvmlDevice) GetNvLinkRemotePciInfo(link int) (PciInfo, Return) {
|
||||||
|
p, r := nvml.Device(d).GetNvLinkRemotePciInfo(link)
|
||||||
|
return PciInfo(p), Return(r)
|
||||||
|
}
|
||||||
|
@ -74,12 +74,21 @@ var _ Device = &DeviceMock{}
|
|||||||
// GetNameFunc: func() (string, Return) {
|
// GetNameFunc: func() (string, Return) {
|
||||||
// panic("mock out the GetName method")
|
// panic("mock out the GetName method")
|
||||||
// },
|
// },
|
||||||
|
// GetNvLinkRemotePciInfoFunc: func(n int) (PciInfo, Return) {
|
||||||
|
// panic("mock out the GetNvLinkRemotePciInfo method")
|
||||||
|
// },
|
||||||
|
// GetNvLinkStateFunc: func(n int) (EnableState, Return) {
|
||||||
|
// panic("mock out the GetNvLinkState method")
|
||||||
|
// },
|
||||||
// GetPciInfoFunc: func() (PciInfo, Return) {
|
// GetPciInfoFunc: func() (PciInfo, Return) {
|
||||||
// panic("mock out the GetPciInfo method")
|
// panic("mock out the GetPciInfo method")
|
||||||
// },
|
// },
|
||||||
// GetSupportedEventTypesFunc: func() (uint64, Return) {
|
// GetSupportedEventTypesFunc: func() (uint64, Return) {
|
||||||
// panic("mock out the GetSupportedEventTypes method")
|
// panic("mock out the GetSupportedEventTypes method")
|
||||||
// },
|
// },
|
||||||
|
// GetTopologyCommonAncestorFunc: func(device Device) (GpuTopologyLevel, Return) {
|
||||||
|
// panic("mock out the GetTopologyCommonAncestor method")
|
||||||
|
// },
|
||||||
// GetUUIDFunc: func() (string, Return) {
|
// GetUUIDFunc: func() (string, Return) {
|
||||||
// panic("mock out the GetUUID method")
|
// panic("mock out the GetUUID method")
|
||||||
// },
|
// },
|
||||||
@ -156,12 +165,21 @@ type DeviceMock struct {
|
|||||||
// GetNameFunc mocks the GetName method.
|
// GetNameFunc mocks the GetName method.
|
||||||
GetNameFunc func() (string, Return)
|
GetNameFunc func() (string, Return)
|
||||||
|
|
||||||
|
// GetNvLinkRemotePciInfoFunc mocks the GetNvLinkRemotePciInfo method.
|
||||||
|
GetNvLinkRemotePciInfoFunc func(n int) (PciInfo, Return)
|
||||||
|
|
||||||
|
// GetNvLinkStateFunc mocks the GetNvLinkState method.
|
||||||
|
GetNvLinkStateFunc func(n int) (EnableState, Return)
|
||||||
|
|
||||||
// GetPciInfoFunc mocks the GetPciInfo method.
|
// GetPciInfoFunc mocks the GetPciInfo method.
|
||||||
GetPciInfoFunc func() (PciInfo, Return)
|
GetPciInfoFunc func() (PciInfo, Return)
|
||||||
|
|
||||||
// GetSupportedEventTypesFunc mocks the GetSupportedEventTypes method.
|
// GetSupportedEventTypesFunc mocks the GetSupportedEventTypes method.
|
||||||
GetSupportedEventTypesFunc func() (uint64, Return)
|
GetSupportedEventTypesFunc func() (uint64, Return)
|
||||||
|
|
||||||
|
// GetTopologyCommonAncestorFunc mocks the GetTopologyCommonAncestor method.
|
||||||
|
GetTopologyCommonAncestorFunc func(device Device) (GpuTopologyLevel, Return)
|
||||||
|
|
||||||
// GetUUIDFunc mocks the GetUUID method.
|
// GetUUIDFunc mocks the GetUUID method.
|
||||||
GetUUIDFunc func() (string, Return)
|
GetUUIDFunc func() (string, Return)
|
||||||
|
|
||||||
@ -247,12 +265,27 @@ type DeviceMock struct {
|
|||||||
// GetName holds details about calls to the GetName method.
|
// GetName holds details about calls to the GetName method.
|
||||||
GetName []struct {
|
GetName []struct {
|
||||||
}
|
}
|
||||||
|
// GetNvLinkRemotePciInfo holds details about calls to the GetNvLinkRemotePciInfo method.
|
||||||
|
GetNvLinkRemotePciInfo []struct {
|
||||||
|
// N is the n argument value.
|
||||||
|
N int
|
||||||
|
}
|
||||||
|
// GetNvLinkState holds details about calls to the GetNvLinkState method.
|
||||||
|
GetNvLinkState []struct {
|
||||||
|
// N is the n argument value.
|
||||||
|
N int
|
||||||
|
}
|
||||||
// GetPciInfo holds details about calls to the GetPciInfo method.
|
// GetPciInfo holds details about calls to the GetPciInfo method.
|
||||||
GetPciInfo []struct {
|
GetPciInfo []struct {
|
||||||
}
|
}
|
||||||
// GetSupportedEventTypes holds details about calls to the GetSupportedEventTypes method.
|
// GetSupportedEventTypes holds details about calls to the GetSupportedEventTypes method.
|
||||||
GetSupportedEventTypes []struct {
|
GetSupportedEventTypes []struct {
|
||||||
}
|
}
|
||||||
|
// GetTopologyCommonAncestor holds details about calls to the GetTopologyCommonAncestor method.
|
||||||
|
GetTopologyCommonAncestor []struct {
|
||||||
|
// Device is the device argument value.
|
||||||
|
Device Device
|
||||||
|
}
|
||||||
// GetUUID holds details about calls to the GetUUID method.
|
// GetUUID holds details about calls to the GetUUID method.
|
||||||
GetUUID []struct {
|
GetUUID []struct {
|
||||||
}
|
}
|
||||||
@ -291,8 +324,11 @@ type DeviceMock struct {
|
|||||||
lockGetMigMode sync.RWMutex
|
lockGetMigMode sync.RWMutex
|
||||||
lockGetMinorNumber sync.RWMutex
|
lockGetMinorNumber sync.RWMutex
|
||||||
lockGetName sync.RWMutex
|
lockGetName sync.RWMutex
|
||||||
|
lockGetNvLinkRemotePciInfo sync.RWMutex
|
||||||
|
lockGetNvLinkState sync.RWMutex
|
||||||
lockGetPciInfo sync.RWMutex
|
lockGetPciInfo sync.RWMutex
|
||||||
lockGetSupportedEventTypes sync.RWMutex
|
lockGetSupportedEventTypes sync.RWMutex
|
||||||
|
lockGetTopologyCommonAncestor sync.RWMutex
|
||||||
lockGetUUID sync.RWMutex
|
lockGetUUID sync.RWMutex
|
||||||
lockIsMigDeviceHandle sync.RWMutex
|
lockIsMigDeviceHandle sync.RWMutex
|
||||||
lockRegisterEvents sync.RWMutex
|
lockRegisterEvents sync.RWMutex
|
||||||
@ -846,6 +882,70 @@ func (mock *DeviceMock) GetNameCalls() []struct {
|
|||||||
return calls
|
return calls
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetNvLinkRemotePciInfo calls GetNvLinkRemotePciInfoFunc.
|
||||||
|
func (mock *DeviceMock) GetNvLinkRemotePciInfo(n int) (PciInfo, Return) {
|
||||||
|
if mock.GetNvLinkRemotePciInfoFunc == nil {
|
||||||
|
panic("DeviceMock.GetNvLinkRemotePciInfoFunc: method is nil but Device.GetNvLinkRemotePciInfo was just called")
|
||||||
|
}
|
||||||
|
callInfo := struct {
|
||||||
|
N int
|
||||||
|
}{
|
||||||
|
N: n,
|
||||||
|
}
|
||||||
|
mock.lockGetNvLinkRemotePciInfo.Lock()
|
||||||
|
mock.calls.GetNvLinkRemotePciInfo = append(mock.calls.GetNvLinkRemotePciInfo, callInfo)
|
||||||
|
mock.lockGetNvLinkRemotePciInfo.Unlock()
|
||||||
|
return mock.GetNvLinkRemotePciInfoFunc(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNvLinkRemotePciInfoCalls gets all the calls that were made to GetNvLinkRemotePciInfo.
|
||||||
|
// Check the length with:
|
||||||
|
//
|
||||||
|
// len(mockedDevice.GetNvLinkRemotePciInfoCalls())
|
||||||
|
func (mock *DeviceMock) GetNvLinkRemotePciInfoCalls() []struct {
|
||||||
|
N int
|
||||||
|
} {
|
||||||
|
var calls []struct {
|
||||||
|
N int
|
||||||
|
}
|
||||||
|
mock.lockGetNvLinkRemotePciInfo.RLock()
|
||||||
|
calls = mock.calls.GetNvLinkRemotePciInfo
|
||||||
|
mock.lockGetNvLinkRemotePciInfo.RUnlock()
|
||||||
|
return calls
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNvLinkState calls GetNvLinkStateFunc.
|
||||||
|
func (mock *DeviceMock) GetNvLinkState(n int) (EnableState, Return) {
|
||||||
|
if mock.GetNvLinkStateFunc == nil {
|
||||||
|
panic("DeviceMock.GetNvLinkStateFunc: method is nil but Device.GetNvLinkState was just called")
|
||||||
|
}
|
||||||
|
callInfo := struct {
|
||||||
|
N int
|
||||||
|
}{
|
||||||
|
N: n,
|
||||||
|
}
|
||||||
|
mock.lockGetNvLinkState.Lock()
|
||||||
|
mock.calls.GetNvLinkState = append(mock.calls.GetNvLinkState, callInfo)
|
||||||
|
mock.lockGetNvLinkState.Unlock()
|
||||||
|
return mock.GetNvLinkStateFunc(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNvLinkStateCalls gets all the calls that were made to GetNvLinkState.
|
||||||
|
// Check the length with:
|
||||||
|
//
|
||||||
|
// len(mockedDevice.GetNvLinkStateCalls())
|
||||||
|
func (mock *DeviceMock) GetNvLinkStateCalls() []struct {
|
||||||
|
N int
|
||||||
|
} {
|
||||||
|
var calls []struct {
|
||||||
|
N int
|
||||||
|
}
|
||||||
|
mock.lockGetNvLinkState.RLock()
|
||||||
|
calls = mock.calls.GetNvLinkState
|
||||||
|
mock.lockGetNvLinkState.RUnlock()
|
||||||
|
return calls
|
||||||
|
}
|
||||||
|
|
||||||
// GetPciInfo calls GetPciInfoFunc.
|
// GetPciInfo calls GetPciInfoFunc.
|
||||||
func (mock *DeviceMock) GetPciInfo() (PciInfo, Return) {
|
func (mock *DeviceMock) GetPciInfo() (PciInfo, Return) {
|
||||||
if mock.GetPciInfoFunc == nil {
|
if mock.GetPciInfoFunc == nil {
|
||||||
@ -900,6 +1000,38 @@ func (mock *DeviceMock) GetSupportedEventTypesCalls() []struct {
|
|||||||
return calls
|
return calls
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetTopologyCommonAncestor calls GetTopologyCommonAncestorFunc.
|
||||||
|
func (mock *DeviceMock) GetTopologyCommonAncestor(device Device) (GpuTopologyLevel, Return) {
|
||||||
|
if mock.GetTopologyCommonAncestorFunc == nil {
|
||||||
|
panic("DeviceMock.GetTopologyCommonAncestorFunc: method is nil but Device.GetTopologyCommonAncestor was just called")
|
||||||
|
}
|
||||||
|
callInfo := struct {
|
||||||
|
Device Device
|
||||||
|
}{
|
||||||
|
Device: device,
|
||||||
|
}
|
||||||
|
mock.lockGetTopologyCommonAncestor.Lock()
|
||||||
|
mock.calls.GetTopologyCommonAncestor = append(mock.calls.GetTopologyCommonAncestor, callInfo)
|
||||||
|
mock.lockGetTopologyCommonAncestor.Unlock()
|
||||||
|
return mock.GetTopologyCommonAncestorFunc(device)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetTopologyCommonAncestorCalls gets all the calls that were made to GetTopologyCommonAncestor.
|
||||||
|
// Check the length with:
|
||||||
|
//
|
||||||
|
// len(mockedDevice.GetTopologyCommonAncestorCalls())
|
||||||
|
func (mock *DeviceMock) GetTopologyCommonAncestorCalls() []struct {
|
||||||
|
Device Device
|
||||||
|
} {
|
||||||
|
var calls []struct {
|
||||||
|
Device Device
|
||||||
|
}
|
||||||
|
mock.lockGetTopologyCommonAncestor.RLock()
|
||||||
|
calls = mock.calls.GetTopologyCommonAncestor
|
||||||
|
mock.lockGetTopologyCommonAncestor.RUnlock()
|
||||||
|
return calls
|
||||||
|
}
|
||||||
|
|
||||||
// GetUUID calls GetUUIDFunc.
|
// GetUUID calls GetUUIDFunc.
|
||||||
func (mock *DeviceMock) GetUUID() (string, Return) {
|
func (mock *DeviceMock) GetUUID() (string, Return) {
|
||||||
if mock.GetUUIDFunc == nil {
|
if mock.GetUUIDFunc == nil {
|
||||||
|
@ -59,8 +59,11 @@ type Device interface {
|
|||||||
GetMigMode() (int, int, Return)
|
GetMigMode() (int, int, Return)
|
||||||
GetMinorNumber() (int, Return)
|
GetMinorNumber() (int, Return)
|
||||||
GetName() (string, Return)
|
GetName() (string, Return)
|
||||||
|
GetNvLinkRemotePciInfo(int) (PciInfo, Return)
|
||||||
|
GetNvLinkState(int) (EnableState, Return)
|
||||||
GetPciInfo() (PciInfo, Return)
|
GetPciInfo() (PciInfo, Return)
|
||||||
GetSupportedEventTypes() (uint64, Return)
|
GetSupportedEventTypes() (uint64, Return)
|
||||||
|
GetTopologyCommonAncestor(Device) (GpuTopologyLevel, Return)
|
||||||
GetUUID() (string, Return)
|
GetUUID() (string, Return)
|
||||||
IsMigDeviceHandle() (bool, Return)
|
IsMigDeviceHandle() (bool, Return)
|
||||||
RegisterEvents(uint64, EventSet) Return
|
RegisterEvents(uint64, EventSet) Return
|
||||||
@ -145,3 +148,9 @@ type DeviceArchitecture nvml.DeviceArchitecture
|
|||||||
|
|
||||||
// BrandType represents the brand of a GPU device
|
// BrandType represents the brand of a GPU device
|
||||||
type BrandType nvml.BrandType
|
type BrandType nvml.BrandType
|
||||||
|
|
||||||
|
// GpuTopologyLevel represents level relationships within a system between two GPUs
|
||||||
|
type GpuTopologyLevel nvml.GpuTopologyLevel
|
||||||
|
|
||||||
|
// EnableState represents a generic enable/disable enum
|
||||||
|
type EnableState nvml.EnableState
|
||||||
|
Loading…
Reference in New Issue
Block a user