Merge branch 'update-go-nvlib' into 'main'

Update to github.com/NVIDIA/go-nvlib@9fd385bace0d2b8949cf60d9fcaab6169bde87ef

See merge request nvidia/container-toolkit/container-toolkit!495
This commit is contained in:
Evan Lezar 2023-11-20 11:25:25 +00:00
commit f6e3593a72
7 changed files with 190 additions and 4 deletions

2
go.mod
View File

@ -3,7 +3,7 @@ module github.com/NVIDIA/nvidia-container-toolkit
go 1.20
require (
github.com/NVIDIA/go-nvlib v0.0.0-20231115170030-b21432a353e1
github.com/NVIDIA/go-nvlib v0.0.0-20231116150931-9fd385bace0d
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f
github.com/fsnotify/fsnotify v1.5.4
github.com/opencontainers/runtime-spec v1.1.0

4
go.sum
View File

@ -1,6 +1,6 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NVIDIA/go-nvlib v0.0.0-20231115170030-b21432a353e1 h1:A+2GAIeZkdID7Jc/aSJ1dq42eippP5uHvRMgO8eN3aM=
github.com/NVIDIA/go-nvlib v0.0.0-20231115170030-b21432a353e1/go.mod h1:HPFNPAYqQeoos58MKUboWsdZMu71EzSQrbmd+QBRD40=
github.com/NVIDIA/go-nvlib v0.0.0-20231116150931-9fd385bace0d h1:XxRHS7eNkZVcPpZZmUcoT4oO8FEcoYKn06sooQh5niU=
github.com/NVIDIA/go-nvlib v0.0.0-20231116150931-9fd385bace0d/go.mod h1:HPFNPAYqQeoos58MKUboWsdZMu71EzSQrbmd+QBRD40=
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f h1:FTblgO87K1vPB8tcwM5EOFpFf6UpsrlDpErPm25mFWE=
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=

View File

@ -20,6 +20,11 @@ import (
"github.com/NVIDIA/go-nvml/pkg/nvml"
)
// General untyped constants
const (
NVLINK_MAX_LINKS = nvml.NVLINK_MAX_LINKS
)
// Return constants
const (
SUCCESS = Return(nvml.SUCCESS)
@ -131,3 +136,19 @@ const (
EventTypeSingleBitEccError = nvml.EventTypeSingleBitEccError
EventTypeDoubleBitEccError = nvml.EventTypeDoubleBitEccError
)
// GPU Topology enumeration
const (
TOPOLOGY_INTERNAL = GpuTopologyLevel(nvml.TOPOLOGY_INTERNAL)
TOPOLOGY_SINGLE = GpuTopologyLevel(nvml.TOPOLOGY_SINGLE)
TOPOLOGY_MULTIPLE = GpuTopologyLevel(nvml.TOPOLOGY_MULTIPLE)
TOPOLOGY_HOSTBRIDGE = GpuTopologyLevel(nvml.TOPOLOGY_HOSTBRIDGE)
TOPOLOGY_NODE = GpuTopologyLevel(nvml.TOPOLOGY_NODE)
TOPOLOGY_SYSTEM = GpuTopologyLevel(nvml.TOPOLOGY_SYSTEM)
)
// Generic enable/disable constants
const (
FEATURE_DISABLED = EnableState(nvml.FEATURE_DISABLED)
FEATURE_ENABLED = EnableState(nvml.FEATURE_ENABLED)
)

View File

@ -178,3 +178,27 @@ func (d nvmlDevice) GetSupportedEventTypes() (uint64, Return) {
e, r := nvml.Device(d).GetSupportedEventTypes()
return e, Return(r)
}
// GetTopologyCommonAncestor retrieves the common ancestor for two devices.
func (d nvmlDevice) GetTopologyCommonAncestor(o Device) (GpuTopologyLevel, Return) {
other, ok := o.(nvmlDevice)
if !ok {
return 0, ERROR_INVALID_ARGUMENT
}
l, r := nvml.Device(d).GetTopologyCommonAncestor(nvml.Device(other))
return GpuTopologyLevel(l), Return(r)
}
// GetNvLinkState retrieves the state of the device's NvLink for the link specified.
func (d nvmlDevice) GetNvLinkState(link int) (EnableState, Return) {
s, r := nvml.Device(d).GetNvLinkState(link)
return EnableState(s), Return(r)
}
// GetNvLinkRemotePciInfo retrieves the PCI information for the remote node on a NvLink link.
// Note: pciSubSystemId is not filled in this function and is indeterminate.
func (d nvmlDevice) GetNvLinkRemotePciInfo(link int) (PciInfo, Return) {
p, r := nvml.Device(d).GetNvLinkRemotePciInfo(link)
return PciInfo(p), Return(r)
}

View File

@ -74,12 +74,21 @@ var _ Device = &DeviceMock{}
// GetNameFunc: func() (string, Return) {
// panic("mock out the GetName method")
// },
// GetNvLinkRemotePciInfoFunc: func(n int) (PciInfo, Return) {
// panic("mock out the GetNvLinkRemotePciInfo method")
// },
// GetNvLinkStateFunc: func(n int) (EnableState, Return) {
// panic("mock out the GetNvLinkState method")
// },
// GetPciInfoFunc: func() (PciInfo, Return) {
// panic("mock out the GetPciInfo method")
// },
// GetSupportedEventTypesFunc: func() (uint64, Return) {
// panic("mock out the GetSupportedEventTypes method")
// },
// GetTopologyCommonAncestorFunc: func(device Device) (GpuTopologyLevel, Return) {
// panic("mock out the GetTopologyCommonAncestor method")
// },
// GetUUIDFunc: func() (string, Return) {
// panic("mock out the GetUUID method")
// },
@ -156,12 +165,21 @@ type DeviceMock struct {
// GetNameFunc mocks the GetName method.
GetNameFunc func() (string, Return)
// GetNvLinkRemotePciInfoFunc mocks the GetNvLinkRemotePciInfo method.
GetNvLinkRemotePciInfoFunc func(n int) (PciInfo, Return)
// GetNvLinkStateFunc mocks the GetNvLinkState method.
GetNvLinkStateFunc func(n int) (EnableState, Return)
// GetPciInfoFunc mocks the GetPciInfo method.
GetPciInfoFunc func() (PciInfo, Return)
// GetSupportedEventTypesFunc mocks the GetSupportedEventTypes method.
GetSupportedEventTypesFunc func() (uint64, Return)
// GetTopologyCommonAncestorFunc mocks the GetTopologyCommonAncestor method.
GetTopologyCommonAncestorFunc func(device Device) (GpuTopologyLevel, Return)
// GetUUIDFunc mocks the GetUUID method.
GetUUIDFunc func() (string, Return)
@ -247,12 +265,27 @@ type DeviceMock struct {
// GetName holds details about calls to the GetName method.
GetName []struct {
}
// GetNvLinkRemotePciInfo holds details about calls to the GetNvLinkRemotePciInfo method.
GetNvLinkRemotePciInfo []struct {
// N is the n argument value.
N int
}
// GetNvLinkState holds details about calls to the GetNvLinkState method.
GetNvLinkState []struct {
// N is the n argument value.
N int
}
// GetPciInfo holds details about calls to the GetPciInfo method.
GetPciInfo []struct {
}
// GetSupportedEventTypes holds details about calls to the GetSupportedEventTypes method.
GetSupportedEventTypes []struct {
}
// GetTopologyCommonAncestor holds details about calls to the GetTopologyCommonAncestor method.
GetTopologyCommonAncestor []struct {
// Device is the device argument value.
Device Device
}
// GetUUID holds details about calls to the GetUUID method.
GetUUID []struct {
}
@ -291,8 +324,11 @@ type DeviceMock struct {
lockGetMigMode sync.RWMutex
lockGetMinorNumber sync.RWMutex
lockGetName sync.RWMutex
lockGetNvLinkRemotePciInfo sync.RWMutex
lockGetNvLinkState sync.RWMutex
lockGetPciInfo sync.RWMutex
lockGetSupportedEventTypes sync.RWMutex
lockGetTopologyCommonAncestor sync.RWMutex
lockGetUUID sync.RWMutex
lockIsMigDeviceHandle sync.RWMutex
lockRegisterEvents sync.RWMutex
@ -846,6 +882,70 @@ func (mock *DeviceMock) GetNameCalls() []struct {
return calls
}
// GetNvLinkRemotePciInfo calls GetNvLinkRemotePciInfoFunc.
func (mock *DeviceMock) GetNvLinkRemotePciInfo(n int) (PciInfo, Return) {
if mock.GetNvLinkRemotePciInfoFunc == nil {
panic("DeviceMock.GetNvLinkRemotePciInfoFunc: method is nil but Device.GetNvLinkRemotePciInfo was just called")
}
callInfo := struct {
N int
}{
N: n,
}
mock.lockGetNvLinkRemotePciInfo.Lock()
mock.calls.GetNvLinkRemotePciInfo = append(mock.calls.GetNvLinkRemotePciInfo, callInfo)
mock.lockGetNvLinkRemotePciInfo.Unlock()
return mock.GetNvLinkRemotePciInfoFunc(n)
}
// GetNvLinkRemotePciInfoCalls gets all the calls that were made to GetNvLinkRemotePciInfo.
// Check the length with:
//
// len(mockedDevice.GetNvLinkRemotePciInfoCalls())
func (mock *DeviceMock) GetNvLinkRemotePciInfoCalls() []struct {
N int
} {
var calls []struct {
N int
}
mock.lockGetNvLinkRemotePciInfo.RLock()
calls = mock.calls.GetNvLinkRemotePciInfo
mock.lockGetNvLinkRemotePciInfo.RUnlock()
return calls
}
// GetNvLinkState calls GetNvLinkStateFunc.
func (mock *DeviceMock) GetNvLinkState(n int) (EnableState, Return) {
if mock.GetNvLinkStateFunc == nil {
panic("DeviceMock.GetNvLinkStateFunc: method is nil but Device.GetNvLinkState was just called")
}
callInfo := struct {
N int
}{
N: n,
}
mock.lockGetNvLinkState.Lock()
mock.calls.GetNvLinkState = append(mock.calls.GetNvLinkState, callInfo)
mock.lockGetNvLinkState.Unlock()
return mock.GetNvLinkStateFunc(n)
}
// GetNvLinkStateCalls gets all the calls that were made to GetNvLinkState.
// Check the length with:
//
// len(mockedDevice.GetNvLinkStateCalls())
func (mock *DeviceMock) GetNvLinkStateCalls() []struct {
N int
} {
var calls []struct {
N int
}
mock.lockGetNvLinkState.RLock()
calls = mock.calls.GetNvLinkState
mock.lockGetNvLinkState.RUnlock()
return calls
}
// GetPciInfo calls GetPciInfoFunc.
func (mock *DeviceMock) GetPciInfo() (PciInfo, Return) {
if mock.GetPciInfoFunc == nil {
@ -900,6 +1000,38 @@ func (mock *DeviceMock) GetSupportedEventTypesCalls() []struct {
return calls
}
// GetTopologyCommonAncestor calls GetTopologyCommonAncestorFunc.
func (mock *DeviceMock) GetTopologyCommonAncestor(device Device) (GpuTopologyLevel, Return) {
if mock.GetTopologyCommonAncestorFunc == nil {
panic("DeviceMock.GetTopologyCommonAncestorFunc: method is nil but Device.GetTopologyCommonAncestor was just called")
}
callInfo := struct {
Device Device
}{
Device: device,
}
mock.lockGetTopologyCommonAncestor.Lock()
mock.calls.GetTopologyCommonAncestor = append(mock.calls.GetTopologyCommonAncestor, callInfo)
mock.lockGetTopologyCommonAncestor.Unlock()
return mock.GetTopologyCommonAncestorFunc(device)
}
// GetTopologyCommonAncestorCalls gets all the calls that were made to GetTopologyCommonAncestor.
// Check the length with:
//
// len(mockedDevice.GetTopologyCommonAncestorCalls())
func (mock *DeviceMock) GetTopologyCommonAncestorCalls() []struct {
Device Device
} {
var calls []struct {
Device Device
}
mock.lockGetTopologyCommonAncestor.RLock()
calls = mock.calls.GetTopologyCommonAncestor
mock.lockGetTopologyCommonAncestor.RUnlock()
return calls
}
// GetUUID calls GetUUIDFunc.
func (mock *DeviceMock) GetUUID() (string, Return) {
if mock.GetUUIDFunc == nil {

View File

@ -59,8 +59,11 @@ type Device interface {
GetMigMode() (int, int, Return)
GetMinorNumber() (int, Return)
GetName() (string, Return)
GetNvLinkRemotePciInfo(int) (PciInfo, Return)
GetNvLinkState(int) (EnableState, Return)
GetPciInfo() (PciInfo, Return)
GetSupportedEventTypes() (uint64, Return)
GetTopologyCommonAncestor(Device) (GpuTopologyLevel, Return)
GetUUID() (string, Return)
IsMigDeviceHandle() (bool, Return)
RegisterEvents(uint64, EventSet) Return
@ -145,3 +148,9 @@ type DeviceArchitecture nvml.DeviceArchitecture
// BrandType represents the brand of a GPU device
type BrandType nvml.BrandType
// GpuTopologyLevel represents level relationships within a system between two GPUs
type GpuTopologyLevel nvml.GpuTopologyLevel
// EnableState represents a generic enable/disable enum
type EnableState nvml.EnableState

2
vendor/modules.txt vendored
View File

@ -1,4 +1,4 @@
# github.com/NVIDIA/go-nvlib v0.0.0-20231115170030-b21432a353e1
# github.com/NVIDIA/go-nvlib v0.0.0-20231116150931-9fd385bace0d
## explicit; go 1.20
github.com/NVIDIA/go-nvlib/pkg/nvlib/device
github.com/NVIDIA/go-nvlib/pkg/nvlib/info