mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Bump go-nvlib to v0.2.0 and go-nvml v0.12.0-3
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
9
vendor/github.com/NVIDIA/go-nvlib/pkg/nvml/consts.go
generated
vendored
9
vendor/github.com/NVIDIA/go-nvlib/pkg/nvml/consts.go
generated
vendored
@@ -152,3 +152,12 @@ const (
|
||||
FEATURE_DISABLED = EnableState(nvml.FEATURE_DISABLED)
|
||||
FEATURE_ENABLED = EnableState(nvml.FEATURE_ENABLED)
|
||||
)
|
||||
|
||||
// Compute mode constants
|
||||
const (
|
||||
COMPUTEMODE_DEFAULT = ComputeMode(nvml.COMPUTEMODE_DEFAULT)
|
||||
COMPUTEMODE_EXCLUSIVE_THREAD = ComputeMode(nvml.COMPUTEMODE_EXCLUSIVE_THREAD)
|
||||
COMPUTEMODE_PROHIBITED = ComputeMode(nvml.COMPUTEMODE_PROHIBITED)
|
||||
COMPUTEMODE_EXCLUSIVE_PROCESS = ComputeMode(nvml.COMPUTEMODE_EXCLUSIVE_PROCESS)
|
||||
COMPUTEMODE_COUNT = ComputeMode(nvml.COMPUTEMODE_COUNT)
|
||||
)
|
||||
|
||||
17
vendor/github.com/NVIDIA/go-nvlib/pkg/nvml/device.go
generated
vendored
17
vendor/github.com/NVIDIA/go-nvlib/pkg/nvml/device.go
generated
vendored
@@ -22,6 +22,11 @@ type nvmlDevice nvml.Device
|
||||
|
||||
var _ Device = (*nvmlDevice)(nil)
|
||||
|
||||
// nvmlDeviceHandle returns a pointer to the underlying device.
|
||||
func (d nvmlDevice) nvmlDeviceHandle() *nvml.Device {
|
||||
return (*nvml.Device)(&d)
|
||||
}
|
||||
|
||||
// GetIndex returns the index of a Device
|
||||
func (d nvmlDevice) GetIndex() (int, Return) {
|
||||
i, r := nvml.Device(d).GetIndex()
|
||||
@@ -181,12 +186,12 @@ func (d nvmlDevice) GetSupportedEventTypes() (uint64, Return) {
|
||||
|
||||
// GetTopologyCommonAncestor retrieves the common ancestor for two devices.
|
||||
func (d nvmlDevice) GetTopologyCommonAncestor(o Device) (GpuTopologyLevel, Return) {
|
||||
other, ok := o.(nvmlDevice)
|
||||
if !ok {
|
||||
other := o.nvmlDeviceHandle()
|
||||
if other == nil {
|
||||
return 0, ERROR_INVALID_ARGUMENT
|
||||
}
|
||||
|
||||
l, r := nvml.Device(d).GetTopologyCommonAncestor(nvml.Device(other))
|
||||
l, r := nvml.Device(d).GetTopologyCommonAncestor(*other)
|
||||
return GpuTopologyLevel(l), Return(r)
|
||||
}
|
||||
|
||||
@@ -202,3 +207,9 @@ func (d nvmlDevice) GetNvLinkRemotePciInfo(link int) (PciInfo, Return) {
|
||||
p, r := nvml.Device(d).GetNvLinkRemotePciInfo(link)
|
||||
return PciInfo(p), Return(r)
|
||||
}
|
||||
|
||||
// SetComputeMode sets the compute mode for the device.
|
||||
func (d nvmlDevice) SetComputeMode(mode ComputeMode) Return {
|
||||
r := nvml.Device(d).SetComputeMode(nvml.ComputeMode(mode))
|
||||
return Return(r)
|
||||
}
|
||||
|
||||
82
vendor/github.com/NVIDIA/go-nvlib/pkg/nvml/device_mock.go
generated
vendored
82
vendor/github.com/NVIDIA/go-nvlib/pkg/nvml/device_mock.go
generated
vendored
@@ -4,6 +4,7 @@
|
||||
package nvml
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
"sync"
|
||||
)
|
||||
|
||||
@@ -98,9 +99,15 @@ var _ Device = &DeviceMock{}
|
||||
// RegisterEventsFunc: func(v uint64, eventSet EventSet) Return {
|
||||
// panic("mock out the RegisterEvents method")
|
||||
// },
|
||||
// SetComputeModeFunc: func(computeMode ComputeMode) Return {
|
||||
// panic("mock out the SetComputeMode method")
|
||||
// },
|
||||
// SetMigModeFunc: func(Mode int) (Return, Return) {
|
||||
// panic("mock out the SetMigMode method")
|
||||
// },
|
||||
// nvmlDeviceHandleFunc: func() *nvml.Device {
|
||||
// panic("mock out the nvmlDeviceHandle method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedDevice in code that requires Device
|
||||
@@ -189,9 +196,15 @@ type DeviceMock struct {
|
||||
// RegisterEventsFunc mocks the RegisterEvents method.
|
||||
RegisterEventsFunc func(v uint64, eventSet EventSet) Return
|
||||
|
||||
// SetComputeModeFunc mocks the SetComputeMode method.
|
||||
SetComputeModeFunc func(computeMode ComputeMode) Return
|
||||
|
||||
// SetMigModeFunc mocks the SetMigMode method.
|
||||
SetMigModeFunc func(Mode int) (Return, Return)
|
||||
|
||||
// nvmlDeviceHandleFunc mocks the nvmlDeviceHandle method.
|
||||
nvmlDeviceHandleFunc func() *nvml.Device
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// CreateGpuInstanceWithPlacement holds details about calls to the CreateGpuInstanceWithPlacement method.
|
||||
@@ -299,11 +312,19 @@ type DeviceMock struct {
|
||||
// EventSet is the eventSet argument value.
|
||||
EventSet EventSet
|
||||
}
|
||||
// SetComputeMode holds details about calls to the SetComputeMode method.
|
||||
SetComputeMode []struct {
|
||||
// ComputeMode is the computeMode argument value.
|
||||
ComputeMode ComputeMode
|
||||
}
|
||||
// SetMigMode holds details about calls to the SetMigMode method.
|
||||
SetMigMode []struct {
|
||||
// Mode is the Mode argument value.
|
||||
Mode int
|
||||
}
|
||||
// nvmlDeviceHandle holds details about calls to the nvmlDeviceHandle method.
|
||||
nvmlDeviceHandle []struct {
|
||||
}
|
||||
}
|
||||
lockCreateGpuInstanceWithPlacement sync.RWMutex
|
||||
lockGetArchitecture sync.RWMutex
|
||||
@@ -332,7 +353,9 @@ type DeviceMock struct {
|
||||
lockGetUUID sync.RWMutex
|
||||
lockIsMigDeviceHandle sync.RWMutex
|
||||
lockRegisterEvents sync.RWMutex
|
||||
lockSetComputeMode sync.RWMutex
|
||||
lockSetMigMode sync.RWMutex
|
||||
locknvmlDeviceHandle sync.RWMutex
|
||||
}
|
||||
|
||||
// CreateGpuInstanceWithPlacement calls CreateGpuInstanceWithPlacementFunc.
|
||||
@@ -1122,6 +1145,38 @@ func (mock *DeviceMock) RegisterEventsCalls() []struct {
|
||||
return calls
|
||||
}
|
||||
|
||||
// SetComputeMode calls SetComputeModeFunc.
|
||||
func (mock *DeviceMock) SetComputeMode(computeMode ComputeMode) Return {
|
||||
if mock.SetComputeModeFunc == nil {
|
||||
panic("DeviceMock.SetComputeModeFunc: method is nil but Device.SetComputeMode was just called")
|
||||
}
|
||||
callInfo := struct {
|
||||
ComputeMode ComputeMode
|
||||
}{
|
||||
ComputeMode: computeMode,
|
||||
}
|
||||
mock.lockSetComputeMode.Lock()
|
||||
mock.calls.SetComputeMode = append(mock.calls.SetComputeMode, callInfo)
|
||||
mock.lockSetComputeMode.Unlock()
|
||||
return mock.SetComputeModeFunc(computeMode)
|
||||
}
|
||||
|
||||
// SetComputeModeCalls gets all the calls that were made to SetComputeMode.
|
||||
// Check the length with:
|
||||
//
|
||||
// len(mockedDevice.SetComputeModeCalls())
|
||||
func (mock *DeviceMock) SetComputeModeCalls() []struct {
|
||||
ComputeMode ComputeMode
|
||||
} {
|
||||
var calls []struct {
|
||||
ComputeMode ComputeMode
|
||||
}
|
||||
mock.lockSetComputeMode.RLock()
|
||||
calls = mock.calls.SetComputeMode
|
||||
mock.lockSetComputeMode.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// SetMigMode calls SetMigModeFunc.
|
||||
func (mock *DeviceMock) SetMigMode(Mode int) (Return, Return) {
|
||||
if mock.SetMigModeFunc == nil {
|
||||
@@ -1153,3 +1208,30 @@ func (mock *DeviceMock) SetMigModeCalls() []struct {
|
||||
mock.lockSetMigMode.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// nvmlDeviceHandle calls nvmlDeviceHandleFunc.
|
||||
func (mock *DeviceMock) nvmlDeviceHandle() *nvml.Device {
|
||||
if mock.nvmlDeviceHandleFunc == nil {
|
||||
panic("DeviceMock.nvmlDeviceHandleFunc: method is nil but Device.nvmlDeviceHandle was just called")
|
||||
}
|
||||
callInfo := struct {
|
||||
}{}
|
||||
mock.locknvmlDeviceHandle.Lock()
|
||||
mock.calls.nvmlDeviceHandle = append(mock.calls.nvmlDeviceHandle, callInfo)
|
||||
mock.locknvmlDeviceHandle.Unlock()
|
||||
return mock.nvmlDeviceHandleFunc()
|
||||
}
|
||||
|
||||
// nvmlDeviceHandleCalls gets all the calls that were made to nvmlDeviceHandle.
|
||||
// Check the length with:
|
||||
//
|
||||
// len(mockedDevice.nvmlDeviceHandleCalls())
|
||||
func (mock *DeviceMock) nvmlDeviceHandleCalls() []struct {
|
||||
} {
|
||||
var calls []struct {
|
||||
}
|
||||
mock.locknvmlDeviceHandle.RLock()
|
||||
calls = mock.calls.nvmlDeviceHandle
|
||||
mock.locknvmlDeviceHandle.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
6
vendor/github.com/NVIDIA/go-nvlib/pkg/nvml/types.go
generated
vendored
6
vendor/github.com/NVIDIA/go-nvlib/pkg/nvml/types.go
generated
vendored
@@ -67,7 +67,10 @@ type Device interface {
|
||||
GetUUID() (string, Return)
|
||||
IsMigDeviceHandle() (bool, Return)
|
||||
RegisterEvents(uint64, EventSet) Return
|
||||
SetComputeMode(ComputeMode) Return
|
||||
SetMigMode(Mode int) (Return, Return)
|
||||
// nvmlDeviceHandle returns a pointer to the underlying NVML device.
|
||||
nvmlDeviceHandle() *nvml.Device
|
||||
}
|
||||
|
||||
// GpuInstance defines the functions implemented by a GpuInstance
|
||||
@@ -154,3 +157,6 @@ type GpuTopologyLevel nvml.GpuTopologyLevel
|
||||
|
||||
// EnableState represents a generic enable/disable enum
|
||||
type EnableState nvml.EnableState
|
||||
|
||||
// ComputeMode represents the compute mode for a device
|
||||
type ComputeMode nvml.ComputeMode
|
||||
|
||||
2
vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl.go
generated
vendored
2
vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl.go
generated
vendored
@@ -104,7 +104,7 @@ func (dl *DynamicLibrary) Lookup(symbol string) error {
|
||||
var pointer unsafe.Pointer
|
||||
if err := withOSLock(func() error {
|
||||
// Call dlError() to clear out any previous errors.
|
||||
dlError()
|
||||
_ = dlError()
|
||||
pointer = C.dlsym(dl.handle, sym)
|
||||
if pointer == nil {
|
||||
return fmt.Errorf("symbol %q not found: %w", symbol, dlError())
|
||||
|
||||
2
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/api.go
generated
vendored
2
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/api.go
generated
vendored
@@ -31,7 +31,7 @@ type dynamicLibrary interface {
|
||||
Close() error
|
||||
}
|
||||
|
||||
// Interface represents the interace for the NVML library.
|
||||
// Interface represents the interface for the NVML library.
|
||||
type Interface interface {
|
||||
GetLibrary() Library
|
||||
}
|
||||
|
||||
31
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/lib.go
generated
vendored
31
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/lib.go
generated
vendored
@@ -38,9 +38,10 @@ var errLibraryAlreadyLoaded = errors.New("library already loaded")
|
||||
// This includes a reference to the underlying DynamicLibrary
|
||||
type library struct {
|
||||
sync.Mutex
|
||||
path string
|
||||
flags int
|
||||
dl dynamicLibrary
|
||||
path string
|
||||
flags int
|
||||
refcount refcount
|
||||
dl dynamicLibrary
|
||||
}
|
||||
|
||||
// libnvml is a global instance of the nvml library.
|
||||
@@ -77,16 +78,17 @@ var newDynamicLibrary = func(path string, flags int) dynamicLibrary {
|
||||
|
||||
// load initializes the library and updates the versioned symbols.
|
||||
// Multiple calls to an already loaded library will return without error.
|
||||
func (l *library) load() error {
|
||||
func (l *library) load() (rerr error) {
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
if l.dl != nil {
|
||||
|
||||
defer func() { l.refcount.IncOnNoError(rerr) }()
|
||||
if l.refcount > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
dl := newDynamicLibrary(l.path, l.flags)
|
||||
err := dl.Open()
|
||||
if err != nil {
|
||||
if err := dl.Open(); err != nil {
|
||||
return fmt.Errorf("error opening %s: %w", l.path, err)
|
||||
}
|
||||
|
||||
@@ -99,16 +101,16 @@ func (l *library) load() error {
|
||||
// close the underlying library and ensure that the global pointer to the
|
||||
// library is set to nil to ensure that subsequent calls to open will reinitialize it.
|
||||
// Multiple calls to an already closed nvml library will return without error.
|
||||
func (l *library) close() error {
|
||||
func (l *library) close() (rerr error) {
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
if l.dl == nil {
|
||||
defer func() { l.refcount.DecOnNoError(rerr) }()
|
||||
if l.refcount != 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := l.dl.Close()
|
||||
if err != nil {
|
||||
if err := l.dl.Close(); err != nil {
|
||||
return fmt.Errorf("error closing %s: %w", l.path, err)
|
||||
}
|
||||
|
||||
@@ -160,12 +162,7 @@ func (pis ProcessInfo_v1Slice) ToProcessInfoSlice() []ProcessInfo {
|
||||
func (pis ProcessInfo_v2Slice) ToProcessInfoSlice() []ProcessInfo {
|
||||
var newInfos []ProcessInfo
|
||||
for _, pi := range pis {
|
||||
info := ProcessInfo{
|
||||
Pid: pi.Pid,
|
||||
UsedGpuMemory: pi.UsedGpuMemory,
|
||||
GpuInstanceId: pi.GpuInstanceId,
|
||||
ComputeInstanceId: pi.ComputeInstanceId,
|
||||
}
|
||||
info := ProcessInfo(pi)
|
||||
newInfos = append(newInfos, info)
|
||||
}
|
||||
return newInfos
|
||||
|
||||
31
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/refcount.go
generated
vendored
Normal file
31
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/refcount.go
generated
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package nvml
|
||||
|
||||
type refcount int
|
||||
|
||||
func (r *refcount) IncOnNoError(err error) {
|
||||
if err == nil {
|
||||
(*r)++
|
||||
}
|
||||
}
|
||||
|
||||
func (r *refcount) DecOnNoError(err error) {
|
||||
if err == nil && (*r) > 0 {
|
||||
(*r)--
|
||||
}
|
||||
}
|
||||
77
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/return.go
generated
vendored
77
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/return.go
generated
vendored
@@ -14,7 +14,80 @@
|
||||
|
||||
package nvml
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// nvml.ErrorString()
|
||||
func ErrorString(Result Return) string {
|
||||
return nvmlErrorString(Result)
|
||||
func ErrorString(r Return) string {
|
||||
if err := GetLibrary().Lookup("nvmlErrorString"); err != nil {
|
||||
return fallbackErrorStringFunc(r)
|
||||
}
|
||||
return nvmlErrorString(r)
|
||||
}
|
||||
|
||||
// fallbackErrorStringFunc provides a basic nvmlErrorString implementation.
|
||||
// This allows the nvml.ErrorString function to be used even if the NVML library
|
||||
// is not loaded.
|
||||
var fallbackErrorStringFunc = func(r Return) string {
|
||||
switch r {
|
||||
case SUCCESS:
|
||||
return "SUCCESS"
|
||||
case ERROR_UNINITIALIZED:
|
||||
return "ERROR_UNINITIALIZED"
|
||||
case ERROR_INVALID_ARGUMENT:
|
||||
return "ERROR_INVALID_ARGUMENT"
|
||||
case ERROR_NOT_SUPPORTED:
|
||||
return "ERROR_NOT_SUPPORTED"
|
||||
case ERROR_NO_PERMISSION:
|
||||
return "ERROR_NO_PERMISSION"
|
||||
case ERROR_ALREADY_INITIALIZED:
|
||||
return "ERROR_ALREADY_INITIALIZED"
|
||||
case ERROR_NOT_FOUND:
|
||||
return "ERROR_NOT_FOUND"
|
||||
case ERROR_INSUFFICIENT_SIZE:
|
||||
return "ERROR_INSUFFICIENT_SIZE"
|
||||
case ERROR_INSUFFICIENT_POWER:
|
||||
return "ERROR_INSUFFICIENT_POWER"
|
||||
case ERROR_DRIVER_NOT_LOADED:
|
||||
return "ERROR_DRIVER_NOT_LOADED"
|
||||
case ERROR_TIMEOUT:
|
||||
return "ERROR_TIMEOUT"
|
||||
case ERROR_IRQ_ISSUE:
|
||||
return "ERROR_IRQ_ISSUE"
|
||||
case ERROR_LIBRARY_NOT_FOUND:
|
||||
return "ERROR_LIBRARY_NOT_FOUND"
|
||||
case ERROR_FUNCTION_NOT_FOUND:
|
||||
return "ERROR_FUNCTION_NOT_FOUND"
|
||||
case ERROR_CORRUPTED_INFOROM:
|
||||
return "ERROR_CORRUPTED_INFOROM"
|
||||
case ERROR_GPU_IS_LOST:
|
||||
return "ERROR_GPU_IS_LOST"
|
||||
case ERROR_RESET_REQUIRED:
|
||||
return "ERROR_RESET_REQUIRED"
|
||||
case ERROR_OPERATING_SYSTEM:
|
||||
return "ERROR_OPERATING_SYSTEM"
|
||||
case ERROR_LIB_RM_VERSION_MISMATCH:
|
||||
return "ERROR_LIB_RM_VERSION_MISMATCH"
|
||||
case ERROR_IN_USE:
|
||||
return "ERROR_IN_USE"
|
||||
case ERROR_MEMORY:
|
||||
return "ERROR_MEMORY"
|
||||
case ERROR_NO_DATA:
|
||||
return "ERROR_NO_DATA"
|
||||
case ERROR_VGPU_ECC_NOT_SUPPORTED:
|
||||
return "ERROR_VGPU_ECC_NOT_SUPPORTED"
|
||||
case ERROR_INSUFFICIENT_RESOURCES:
|
||||
return "ERROR_INSUFFICIENT_RESOURCES"
|
||||
case ERROR_FREQ_NOT_SUPPORTED:
|
||||
return "ERROR_FREQ_NOT_SUPPORTED"
|
||||
case ERROR_ARGUMENT_VERSION_MISMATCH:
|
||||
return "ERROR_ARGUMENT_VERSION_MISMATCH"
|
||||
case ERROR_DEPRECATED:
|
||||
return "ERROR_DEPRECATED"
|
||||
case ERROR_UNKNOWN:
|
||||
return "ERROR_UNKNOWN"
|
||||
default:
|
||||
return fmt.Sprintf("unknown return value: %d", r)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user