Merge pull request #7 from NVIDIA/add-nvlink-functions

Add functions related to NVLink info
This commit is contained in:
Evan Lezar 2023-11-16 16:09:31 +01:00 committed by GitHub
commit 9fd385bace
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 186 additions and 0 deletions

View File

@ -20,6 +20,11 @@ import (
"github.com/NVIDIA/go-nvml/pkg/nvml" "github.com/NVIDIA/go-nvml/pkg/nvml"
) )
// General untyped constants
const (
NVLINK_MAX_LINKS = nvml.NVLINK_MAX_LINKS
)
// Return constants // Return constants
const ( const (
SUCCESS = Return(nvml.SUCCESS) SUCCESS = Return(nvml.SUCCESS)
@ -131,3 +136,19 @@ const (
EventTypeSingleBitEccError = nvml.EventTypeSingleBitEccError EventTypeSingleBitEccError = nvml.EventTypeSingleBitEccError
EventTypeDoubleBitEccError = nvml.EventTypeDoubleBitEccError EventTypeDoubleBitEccError = nvml.EventTypeDoubleBitEccError
) )
// GPU Topology enumeration
const (
TOPOLOGY_INTERNAL = GpuTopologyLevel(nvml.TOPOLOGY_INTERNAL)
TOPOLOGY_SINGLE = GpuTopologyLevel(nvml.TOPOLOGY_SINGLE)
TOPOLOGY_MULTIPLE = GpuTopologyLevel(nvml.TOPOLOGY_MULTIPLE)
TOPOLOGY_HOSTBRIDGE = GpuTopologyLevel(nvml.TOPOLOGY_HOSTBRIDGE)
TOPOLOGY_NODE = GpuTopologyLevel(nvml.TOPOLOGY_NODE)
TOPOLOGY_SYSTEM = GpuTopologyLevel(nvml.TOPOLOGY_SYSTEM)
)
// Generic enable/disable constants
const (
FEATURE_DISABLED = EnableState(nvml.FEATURE_DISABLED)
FEATURE_ENABLED = EnableState(nvml.FEATURE_ENABLED)
)

View File

@ -178,3 +178,27 @@ func (d nvmlDevice) GetSupportedEventTypes() (uint64, Return) {
e, r := nvml.Device(d).GetSupportedEventTypes() e, r := nvml.Device(d).GetSupportedEventTypes()
return e, Return(r) return e, Return(r)
} }
// GetTopologyCommonAncestor retrieves the common ancestor for two devices.
func (d nvmlDevice) GetTopologyCommonAncestor(o Device) (GpuTopologyLevel, Return) {
other, ok := o.(nvmlDevice)
if !ok {
return 0, ERROR_INVALID_ARGUMENT
}
l, r := nvml.Device(d).GetTopologyCommonAncestor(nvml.Device(other))
return GpuTopologyLevel(l), Return(r)
}
// GetNvLinkState retrieves the state of the device's NvLink for the link specified.
func (d nvmlDevice) GetNvLinkState(link int) (EnableState, Return) {
s, r := nvml.Device(d).GetNvLinkState(link)
return EnableState(s), Return(r)
}
// GetNvLinkRemotePciInfo retrieves the PCI information for the remote node on a NvLink link.
// Note: pciSubSystemId is not filled in this function and is indeterminate.
func (d nvmlDevice) GetNvLinkRemotePciInfo(link int) (PciInfo, Return) {
p, r := nvml.Device(d).GetNvLinkRemotePciInfo(link)
return PciInfo(p), Return(r)
}

View File

@ -74,12 +74,21 @@ var _ Device = &DeviceMock{}
// GetNameFunc: func() (string, Return) { // GetNameFunc: func() (string, Return) {
// panic("mock out the GetName method") // panic("mock out the GetName method")
// }, // },
// GetNvLinkRemotePciInfoFunc: func(n int) (PciInfo, Return) {
// panic("mock out the GetNvLinkRemotePciInfo method")
// },
// GetNvLinkStateFunc: func(n int) (EnableState, Return) {
// panic("mock out the GetNvLinkState method")
// },
// GetPciInfoFunc: func() (PciInfo, Return) { // GetPciInfoFunc: func() (PciInfo, Return) {
// panic("mock out the GetPciInfo method") // panic("mock out the GetPciInfo method")
// }, // },
// GetSupportedEventTypesFunc: func() (uint64, Return) { // GetSupportedEventTypesFunc: func() (uint64, Return) {
// panic("mock out the GetSupportedEventTypes method") // panic("mock out the GetSupportedEventTypes method")
// }, // },
// GetTopologyCommonAncestorFunc: func(device Device) (GpuTopologyLevel, Return) {
// panic("mock out the GetTopologyCommonAncestor method")
// },
// GetUUIDFunc: func() (string, Return) { // GetUUIDFunc: func() (string, Return) {
// panic("mock out the GetUUID method") // panic("mock out the GetUUID method")
// }, // },
@ -156,12 +165,21 @@ type DeviceMock struct {
// GetNameFunc mocks the GetName method. // GetNameFunc mocks the GetName method.
GetNameFunc func() (string, Return) GetNameFunc func() (string, Return)
// GetNvLinkRemotePciInfoFunc mocks the GetNvLinkRemotePciInfo method.
GetNvLinkRemotePciInfoFunc func(n int) (PciInfo, Return)
// GetNvLinkStateFunc mocks the GetNvLinkState method.
GetNvLinkStateFunc func(n int) (EnableState, Return)
// GetPciInfoFunc mocks the GetPciInfo method. // GetPciInfoFunc mocks the GetPciInfo method.
GetPciInfoFunc func() (PciInfo, Return) GetPciInfoFunc func() (PciInfo, Return)
// GetSupportedEventTypesFunc mocks the GetSupportedEventTypes method. // GetSupportedEventTypesFunc mocks the GetSupportedEventTypes method.
GetSupportedEventTypesFunc func() (uint64, Return) GetSupportedEventTypesFunc func() (uint64, Return)
// GetTopologyCommonAncestorFunc mocks the GetTopologyCommonAncestor method.
GetTopologyCommonAncestorFunc func(device Device) (GpuTopologyLevel, Return)
// GetUUIDFunc mocks the GetUUID method. // GetUUIDFunc mocks the GetUUID method.
GetUUIDFunc func() (string, Return) GetUUIDFunc func() (string, Return)
@ -247,12 +265,27 @@ type DeviceMock struct {
// GetName holds details about calls to the GetName method. // GetName holds details about calls to the GetName method.
GetName []struct { GetName []struct {
} }
// GetNvLinkRemotePciInfo holds details about calls to the GetNvLinkRemotePciInfo method.
GetNvLinkRemotePciInfo []struct {
// N is the n argument value.
N int
}
// GetNvLinkState holds details about calls to the GetNvLinkState method.
GetNvLinkState []struct {
// N is the n argument value.
N int
}
// GetPciInfo holds details about calls to the GetPciInfo method. // GetPciInfo holds details about calls to the GetPciInfo method.
GetPciInfo []struct { GetPciInfo []struct {
} }
// GetSupportedEventTypes holds details about calls to the GetSupportedEventTypes method. // GetSupportedEventTypes holds details about calls to the GetSupportedEventTypes method.
GetSupportedEventTypes []struct { GetSupportedEventTypes []struct {
} }
// GetTopologyCommonAncestor holds details about calls to the GetTopologyCommonAncestor method.
GetTopologyCommonAncestor []struct {
// Device is the device argument value.
Device Device
}
// GetUUID holds details about calls to the GetUUID method. // GetUUID holds details about calls to the GetUUID method.
GetUUID []struct { GetUUID []struct {
} }
@ -291,8 +324,11 @@ type DeviceMock struct {
lockGetMigMode sync.RWMutex lockGetMigMode sync.RWMutex
lockGetMinorNumber sync.RWMutex lockGetMinorNumber sync.RWMutex
lockGetName sync.RWMutex lockGetName sync.RWMutex
lockGetNvLinkRemotePciInfo sync.RWMutex
lockGetNvLinkState sync.RWMutex
lockGetPciInfo sync.RWMutex lockGetPciInfo sync.RWMutex
lockGetSupportedEventTypes sync.RWMutex lockGetSupportedEventTypes sync.RWMutex
lockGetTopologyCommonAncestor sync.RWMutex
lockGetUUID sync.RWMutex lockGetUUID sync.RWMutex
lockIsMigDeviceHandle sync.RWMutex lockIsMigDeviceHandle sync.RWMutex
lockRegisterEvents sync.RWMutex lockRegisterEvents sync.RWMutex
@ -846,6 +882,70 @@ func (mock *DeviceMock) GetNameCalls() []struct {
return calls return calls
} }
// GetNvLinkRemotePciInfo calls GetNvLinkRemotePciInfoFunc.
func (mock *DeviceMock) GetNvLinkRemotePciInfo(n int) (PciInfo, Return) {
if mock.GetNvLinkRemotePciInfoFunc == nil {
panic("DeviceMock.GetNvLinkRemotePciInfoFunc: method is nil but Device.GetNvLinkRemotePciInfo was just called")
}
callInfo := struct {
N int
}{
N: n,
}
mock.lockGetNvLinkRemotePciInfo.Lock()
mock.calls.GetNvLinkRemotePciInfo = append(mock.calls.GetNvLinkRemotePciInfo, callInfo)
mock.lockGetNvLinkRemotePciInfo.Unlock()
return mock.GetNvLinkRemotePciInfoFunc(n)
}
// GetNvLinkRemotePciInfoCalls gets all the calls that were made to GetNvLinkRemotePciInfo.
// Check the length with:
//
// len(mockedDevice.GetNvLinkRemotePciInfoCalls())
func (mock *DeviceMock) GetNvLinkRemotePciInfoCalls() []struct {
N int
} {
var calls []struct {
N int
}
mock.lockGetNvLinkRemotePciInfo.RLock()
calls = mock.calls.GetNvLinkRemotePciInfo
mock.lockGetNvLinkRemotePciInfo.RUnlock()
return calls
}
// GetNvLinkState calls GetNvLinkStateFunc.
func (mock *DeviceMock) GetNvLinkState(n int) (EnableState, Return) {
if mock.GetNvLinkStateFunc == nil {
panic("DeviceMock.GetNvLinkStateFunc: method is nil but Device.GetNvLinkState was just called")
}
callInfo := struct {
N int
}{
N: n,
}
mock.lockGetNvLinkState.Lock()
mock.calls.GetNvLinkState = append(mock.calls.GetNvLinkState, callInfo)
mock.lockGetNvLinkState.Unlock()
return mock.GetNvLinkStateFunc(n)
}
// GetNvLinkStateCalls gets all the calls that were made to GetNvLinkState.
// Check the length with:
//
// len(mockedDevice.GetNvLinkStateCalls())
func (mock *DeviceMock) GetNvLinkStateCalls() []struct {
N int
} {
var calls []struct {
N int
}
mock.lockGetNvLinkState.RLock()
calls = mock.calls.GetNvLinkState
mock.lockGetNvLinkState.RUnlock()
return calls
}
// GetPciInfo calls GetPciInfoFunc. // GetPciInfo calls GetPciInfoFunc.
func (mock *DeviceMock) GetPciInfo() (PciInfo, Return) { func (mock *DeviceMock) GetPciInfo() (PciInfo, Return) {
if mock.GetPciInfoFunc == nil { if mock.GetPciInfoFunc == nil {
@ -900,6 +1000,38 @@ func (mock *DeviceMock) GetSupportedEventTypesCalls() []struct {
return calls return calls
} }
// GetTopologyCommonAncestor calls GetTopologyCommonAncestorFunc.
func (mock *DeviceMock) GetTopologyCommonAncestor(device Device) (GpuTopologyLevel, Return) {
if mock.GetTopologyCommonAncestorFunc == nil {
panic("DeviceMock.GetTopologyCommonAncestorFunc: method is nil but Device.GetTopologyCommonAncestor was just called")
}
callInfo := struct {
Device Device
}{
Device: device,
}
mock.lockGetTopologyCommonAncestor.Lock()
mock.calls.GetTopologyCommonAncestor = append(mock.calls.GetTopologyCommonAncestor, callInfo)
mock.lockGetTopologyCommonAncestor.Unlock()
return mock.GetTopologyCommonAncestorFunc(device)
}
// GetTopologyCommonAncestorCalls gets all the calls that were made to GetTopologyCommonAncestor.
// Check the length with:
//
// len(mockedDevice.GetTopologyCommonAncestorCalls())
func (mock *DeviceMock) GetTopologyCommonAncestorCalls() []struct {
Device Device
} {
var calls []struct {
Device Device
}
mock.lockGetTopologyCommonAncestor.RLock()
calls = mock.calls.GetTopologyCommonAncestor
mock.lockGetTopologyCommonAncestor.RUnlock()
return calls
}
// GetUUID calls GetUUIDFunc. // GetUUID calls GetUUIDFunc.
func (mock *DeviceMock) GetUUID() (string, Return) { func (mock *DeviceMock) GetUUID() (string, Return) {
if mock.GetUUIDFunc == nil { if mock.GetUUIDFunc == nil {

View File

@ -59,8 +59,11 @@ type Device interface {
GetMigMode() (int, int, Return) GetMigMode() (int, int, Return)
GetMinorNumber() (int, Return) GetMinorNumber() (int, Return)
GetName() (string, Return) GetName() (string, Return)
GetNvLinkRemotePciInfo(int) (PciInfo, Return)
GetNvLinkState(int) (EnableState, Return)
GetPciInfo() (PciInfo, Return) GetPciInfo() (PciInfo, Return)
GetSupportedEventTypes() (uint64, Return) GetSupportedEventTypes() (uint64, Return)
GetTopologyCommonAncestor(Device) (GpuTopologyLevel, Return)
GetUUID() (string, Return) GetUUID() (string, Return)
IsMigDeviceHandle() (bool, Return) IsMigDeviceHandle() (bool, Return)
RegisterEvents(uint64, EventSet) Return RegisterEvents(uint64, EventSet) Return
@ -145,3 +148,9 @@ type DeviceArchitecture nvml.DeviceArchitecture
// BrandType represents the brand of a GPU device // BrandType represents the brand of a GPU device
type BrandType nvml.BrandType type BrandType nvml.BrandType
// GpuTopologyLevel represents level relationships within a system between two GPUs
type GpuTopologyLevel nvml.GpuTopologyLevel
// EnableState represents a generic enable/disable enum
type EnableState nvml.EnableState