Merge branch 'use-nvml-symbol-lookup' into 'main'

Use GetLibrary().Lookup() in nvml package

See merge request nvidia/cloud-native/go-nvlib!54
This commit is contained in:
Evan Lezar 2023-10-24 13:01:03 +00:00
commit e5083eec19
17 changed files with 752 additions and 225 deletions

2
go.mod
View File

@ -3,7 +3,7 @@ module gitlab.com/nvidia/cloud-native/go-nvlib
go 1.20 go 1.20
require ( require (
github.com/NVIDIA/go-nvml v0.12.0-1 github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f
github.com/stretchr/testify v1.8.4 github.com/stretchr/testify v1.8.4
) )

11
go.sum
View File

@ -1,12 +1,19 @@
github.com/NVIDIA/go-nvml v0.12.0-1 h1:6mdjtlFo+17dWL7VFPfuRMtf0061TF4DKls9pkSw6uM= github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f h1:FTblgO87K1vPB8tcwM5EOFpFf6UpsrlDpErPm25mFWE=
github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs= github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -19,7 +19,6 @@ package device
import ( import (
"fmt" "fmt"
"github.com/NVIDIA/go-nvml/pkg/dl"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
) )
@ -152,8 +151,7 @@ func (d *device) GetCudaComputeCapabilityAsString() (string, error) {
// IsMigCapable checks if a device is capable of having MIG paprtitions created on it // IsMigCapable checks if a device is capable of having MIG paprtitions created on it
func (d *device) IsMigCapable() (bool, error) { func (d *device) IsMigCapable() (bool, error) {
err := d.lib.nvmlLookupSymbol("nvmlDeviceGetMigMode") if !d.lib.hasSymbol("nvmlDeviceGetMigMode") {
if err != nil {
return false, nil return false, nil
} }
@ -170,8 +168,7 @@ func (d *device) IsMigCapable() (bool, error) {
// IsMigEnabled checks if a device has MIG mode currently enabled on it // IsMigEnabled checks if a device has MIG mode currently enabled on it
func (d *device) IsMigEnabled() (bool, error) { func (d *device) IsMigEnabled() (bool, error) {
err := d.lib.nvmlLookupSymbol("nvmlDeviceGetMigMode") if !d.lib.hasSymbol("nvmlDeviceGetMigMode") {
if err != nil {
return false, nil return false, nil
} }
@ -465,22 +462,12 @@ func (d *devicelib) GetMigProfiles() ([]MigProfile, error) {
return profiles, nil return profiles, nil
} }
// nvmlLookupSymbol checks to see if the given symbol is present in the NVML library // hasSymbol checks to see if the given symbol is present in the NVML library.
func (d *devicelib) nvmlLookupSymbol(symbol string) error { // If devicelib is configured to not verify symbols, then all symbols are assumed to exist.
// If devicelib is configured to not verify symbols, then we short-circuit here func (d *devicelib) hasSymbol(symbol string) bool {
if !*d.verifySymbols { if !*d.verifySymbols {
return nil return true
} }
// Otherwise we lookup the provided symbol and verify it is available return d.nvml.Lookup(symbol) == nil
lib := dl.New("libnvidia-ml.so.1", dl.RTLD_LAZY|dl.RTLD_GLOBAL)
if lib == nil {
return fmt.Errorf("error instantiating DynamicLibrary for NVML")
}
err := lib.Open()
if err != nil {
return fmt.Errorf("error opening DynamicLibrary for NVML: %v", err)
}
defer lib.Close()
return lib.Lookup(symbol)
} }

View File

@ -34,6 +34,12 @@ func New() Interface {
return &nvmlLib{} return &nvmlLib{}
} }
// Lookup checks whether the specified symbol exists in the configured NVML library.
func (n *nvmlLib) Lookup(name string) error {
// TODO: For now we rely on the default NVML library and perform the lookups against this.
return nvml.GetLibrary().Lookup(name)
}
// Init initializes an NVML Interface // Init initializes an NVML Interface
func (n *nvmlLib) Init() Return { func (n *nvmlLib) Init() Return {
ret := nvml.Init() ret := nvml.Init()

View File

@ -35,6 +35,9 @@ var _ Interface = &InterfaceMock{}
// InitFunc: func() Return { // InitFunc: func() Return {
// panic("mock out the Init method") // panic("mock out the Init method")
// }, // },
// LookupFunc: func(s string) error {
// panic("mock out the Lookup method")
// },
// ShutdownFunc: func() Return { // ShutdownFunc: func() Return {
// panic("mock out the Shutdown method") // panic("mock out the Shutdown method")
// }, // },
@ -69,6 +72,9 @@ type InterfaceMock struct {
// InitFunc mocks the Init method. // InitFunc mocks the Init method.
InitFunc func() Return InitFunc func() Return
// LookupFunc mocks the Lookup method.
LookupFunc func(s string) error
// ShutdownFunc mocks the Shutdown method. // ShutdownFunc mocks the Shutdown method.
ShutdownFunc func() Return ShutdownFunc func() Return
@ -104,6 +110,11 @@ type InterfaceMock struct {
// Init holds details about calls to the Init method. // Init holds details about calls to the Init method.
Init []struct { Init []struct {
} }
// Lookup holds details about calls to the Lookup method.
Lookup []struct {
// S is the s argument value.
S string
}
// Shutdown holds details about calls to the Shutdown method. // Shutdown holds details about calls to the Shutdown method.
Shutdown []struct { Shutdown []struct {
} }
@ -120,6 +131,7 @@ type InterfaceMock struct {
lockErrorString sync.RWMutex lockErrorString sync.RWMutex
lockEventSetCreate sync.RWMutex lockEventSetCreate sync.RWMutex
lockInit sync.RWMutex lockInit sync.RWMutex
lockLookup sync.RWMutex
lockShutdown sync.RWMutex lockShutdown sync.RWMutex
lockSystemGetCudaDriverVersion sync.RWMutex lockSystemGetCudaDriverVersion sync.RWMutex
lockSystemGetDriverVersion sync.RWMutex lockSystemGetDriverVersion sync.RWMutex
@ -302,6 +314,38 @@ func (mock *InterfaceMock) InitCalls() []struct {
return calls return calls
} }
// Lookup calls LookupFunc.
func (mock *InterfaceMock) Lookup(s string) error {
if mock.LookupFunc == nil {
panic("InterfaceMock.LookupFunc: method is nil but Interface.Lookup was just called")
}
callInfo := struct {
S string
}{
S: s,
}
mock.lockLookup.Lock()
mock.calls.Lookup = append(mock.calls.Lookup, callInfo)
mock.lockLookup.Unlock()
return mock.LookupFunc(s)
}
// LookupCalls gets all the calls that were made to Lookup.
// Check the length with:
//
// len(mockedInterface.LookupCalls())
func (mock *InterfaceMock) LookupCalls() []struct {
S string
} {
var calls []struct {
S string
}
mock.lockLookup.RLock()
calls = mock.calls.Lookup
mock.lockLookup.RUnlock()
return calls
}
// Shutdown calls ShutdownFunc. // Shutdown calls ShutdownFunc.
func (mock *InterfaceMock) Shutdown() Return { func (mock *InterfaceMock) Shutdown() Return {
if mock.ShutdownFunc == nil { if mock.ShutdownFunc == nil {

View File

@ -30,6 +30,7 @@ type Interface interface {
ErrorString(r Return) string ErrorString(r Return) string
EventSetCreate() (EventSet, Return) EventSetCreate() (EventSet, Return)
Init() Return Init() Return
Lookup(string) error
Shutdown() Return Shutdown() Return
SystemGetCudaDriverVersion() (int, Return) SystemGetCudaDriverVersion() (int, Return)
SystemGetDriverVersion() (string, Return) SystemGetDriverVersion() (string, Return)

View File

@ -15,7 +15,9 @@
package dl package dl
import ( import (
"errors"
"fmt" "fmt"
"runtime"
"unsafe" "unsafe"
) )
@ -25,45 +27,72 @@ import (
import "C" import "C"
const ( const (
RTLD_LAZY = C.RTLD_LAZY RTLD_LAZY = C.RTLD_LAZY
RTLD_NOW = C.RTLD_NOW RTLD_NOW = C.RTLD_NOW
RTLD_GLOBAL = C.RTLD_GLOBAL RTLD_GLOBAL = C.RTLD_GLOBAL
RTLD_LOCAL = C.RTLD_LOCAL RTLD_LOCAL = C.RTLD_LOCAL
RTLD_NODELETE = C.RTLD_NODELETE RTLD_NODELETE = C.RTLD_NODELETE
RTLD_NOLOAD = C.RTLD_NOLOAD RTLD_NOLOAD = C.RTLD_NOLOAD
RTLD_DEEPBIND = C.RTLD_DEEPBIND
) )
type DynamicLibrary struct{ type DynamicLibrary struct {
Name string Name string
Flags int Flags int
handle unsafe.Pointer handle unsafe.Pointer
} }
func New(name string, flags int) *DynamicLibrary { func New(name string, flags int) *DynamicLibrary {
return &DynamicLibrary{ return &DynamicLibrary{
Name: name, Name: name,
Flags: flags, Flags: flags,
handle: nil, handle: nil,
} }
}
func withOSLock(action func() error) error {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
return action()
}
func dlError() error {
lastErr := C.dlerror()
if lastErr == nil {
return nil
}
return errors.New(C.GoString(lastErr))
} }
func (dl *DynamicLibrary) Open() error { func (dl *DynamicLibrary) Open() error {
name := C.CString(dl.Name) name := C.CString(dl.Name)
defer C.free(unsafe.Pointer(name)) defer C.free(unsafe.Pointer(name))
handle := C.dlopen(name, C.int(dl.Flags)) if err := withOSLock(func() error {
if handle == C.NULL { handle := C.dlopen(name, C.int(dl.Flags))
return fmt.Errorf("%s", C.GoString(C.dlerror())) if handle == nil {
return dlError()
}
dl.handle = handle
return nil
}); err != nil {
return err
} }
dl.handle = handle
return nil return nil
} }
func (dl *DynamicLibrary) Close() error { func (dl *DynamicLibrary) Close() error {
err := C.dlclose(dl.handle) if dl.handle == nil {
if err != 0 { return nil
return fmt.Errorf("%s", C.GoString(C.dlerror())) }
if err := withOSLock(func() error {
if C.dlclose(dl.handle) != 0 {
return dlError()
}
dl.handle = nil
return nil
}); err != nil {
return err
} }
return nil return nil
} }
@ -72,11 +101,17 @@ func (dl *DynamicLibrary) Lookup(symbol string) error {
sym := C.CString(symbol) sym := C.CString(symbol)
defer C.free(unsafe.Pointer(sym)) defer C.free(unsafe.Pointer(sym))
C.dlerror() // Clear out any previous errors var pointer unsafe.Pointer
C.dlsym(dl.handle, sym) if err := withOSLock(func() error {
err := C.dlerror() // Call dlError() to clear out any previous errors.
if unsafe.Pointer(err) == C.NULL { dlError()
pointer = C.dlsym(dl.handle, sym)
if pointer == nil {
return fmt.Errorf("symbol %q not found: %w", symbol, dlError())
}
return nil return nil
}); err != nil {
return err
} }
return fmt.Errorf("%s", C.GoString(err)) return nil
} }

26
vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_linux.go generated vendored Normal file
View File

@ -0,0 +1,26 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package dl
// #cgo LDFLAGS: -ldl
// #include <dlfcn.h>
// #include <stdlib.h>
import "C"
const (
RTLD_DEEPBIND = C.RTLD_DEEPBIND
)

37
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/api.go generated vendored Normal file
View File

@ -0,0 +1,37 @@
/**
# Copyright 2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvml
// Library defines a set of functions defined on the underlying dynamic library.
type Library interface {
Lookup(string) error
}
// dynamicLibrary is an interface for abstacting the underlying library.
// This also allows for mocking and testing.
//go:generate moq -stub -out dynamicLibrary_mock.go . dynamicLibrary
type dynamicLibrary interface {
Lookup(string) error
Open() error
Close() error
}
// Interface represents the interace for the NVML library.
type Interface interface {
GetLibrary() Library
}

View File

@ -18,7 +18,8 @@
package nvml package nvml
/* /*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files #cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
#cgo CFLAGS: -DNVML_NO_UNVERSIONED_FUNC_DEFS=1 #cgo CFLAGS: -DNVML_NO_UNVERSIONED_FUNC_DEFS=1
#include "nvml.h" #include "nvml.h"
#include <stdlib.h> #include <stdlib.h>

View File

@ -0,0 +1,157 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package nvml
import (
"sync"
)
// Ensure, that dynamicLibraryMock does implement dynamicLibrary.
// If this is not the case, regenerate this file with moq.
var _ dynamicLibrary = &dynamicLibraryMock{}
// dynamicLibraryMock is a mock implementation of dynamicLibrary.
//
// func TestSomethingThatUsesdynamicLibrary(t *testing.T) {
//
// // make and configure a mocked dynamicLibrary
// mockeddynamicLibrary := &dynamicLibraryMock{
// CloseFunc: func() error {
// panic("mock out the Close method")
// },
// LookupFunc: func(s string) error {
// panic("mock out the Lookup method")
// },
// OpenFunc: func() error {
// panic("mock out the Open method")
// },
// }
//
// // use mockeddynamicLibrary in code that requires dynamicLibrary
// // and then make assertions.
//
// }
type dynamicLibraryMock struct {
// CloseFunc mocks the Close method.
CloseFunc func() error
// LookupFunc mocks the Lookup method.
LookupFunc func(s string) error
// OpenFunc mocks the Open method.
OpenFunc func() error
// calls tracks calls to the methods.
calls struct {
// Close holds details about calls to the Close method.
Close []struct {
}
// Lookup holds details about calls to the Lookup method.
Lookup []struct {
// S is the s argument value.
S string
}
// Open holds details about calls to the Open method.
Open []struct {
}
}
lockClose sync.RWMutex
lockLookup sync.RWMutex
lockOpen sync.RWMutex
}
// Close calls CloseFunc.
func (mock *dynamicLibraryMock) Close() error {
callInfo := struct {
}{}
mock.lockClose.Lock()
mock.calls.Close = append(mock.calls.Close, callInfo)
mock.lockClose.Unlock()
if mock.CloseFunc == nil {
var (
errOut error
)
return errOut
}
return mock.CloseFunc()
}
// CloseCalls gets all the calls that were made to Close.
// Check the length with:
//
// len(mockeddynamicLibrary.CloseCalls())
func (mock *dynamicLibraryMock) CloseCalls() []struct {
} {
var calls []struct {
}
mock.lockClose.RLock()
calls = mock.calls.Close
mock.lockClose.RUnlock()
return calls
}
// Lookup calls LookupFunc.
func (mock *dynamicLibraryMock) Lookup(s string) error {
callInfo := struct {
S string
}{
S: s,
}
mock.lockLookup.Lock()
mock.calls.Lookup = append(mock.calls.Lookup, callInfo)
mock.lockLookup.Unlock()
if mock.LookupFunc == nil {
var (
errOut error
)
return errOut
}
return mock.LookupFunc(s)
}
// LookupCalls gets all the calls that were made to Lookup.
// Check the length with:
//
// len(mockeddynamicLibrary.LookupCalls())
func (mock *dynamicLibraryMock) LookupCalls() []struct {
S string
} {
var calls []struct {
S string
}
mock.lockLookup.RLock()
calls = mock.calls.Lookup
mock.lockLookup.RUnlock()
return calls
}
// Open calls OpenFunc.
func (mock *dynamicLibraryMock) Open() error {
callInfo := struct {
}{}
mock.lockOpen.Lock()
mock.calls.Open = append(mock.calls.Open, callInfo)
mock.lockOpen.Unlock()
if mock.OpenFunc == nil {
var (
errOut error
)
return errOut
}
return mock.OpenFunc()
}
// OpenCalls gets all the calls that were made to Open.
// Check the length with:
//
// len(mockeddynamicLibrary.OpenCalls())
func (mock *dynamicLibraryMock) OpenCalls() []struct {
} {
var calls []struct {
}
mock.lockOpen.RLock()
calls = mock.calls.Open
mock.lockOpen.RUnlock()
return calls
}

93
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go generated vendored Normal file
View File

@ -0,0 +1,93 @@
// Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nvml
// nvml.GpmMetricsGet()
type GpmMetricsGetVType struct {
metricsGet *GpmMetricsGetType
}
func GpmMetricsGetV(MetricsGet *GpmMetricsGetType) GpmMetricsGetVType {
return GpmMetricsGetVType{MetricsGet}
}
func (MetricsGetV GpmMetricsGetVType) V1() Return {
MetricsGetV.metricsGet.Version = 1
return nvmlGpmMetricsGet(MetricsGetV.metricsGet)
}
func GpmMetricsGet(MetricsGet *GpmMetricsGetType) Return {
MetricsGet.Version = GPM_METRICS_GET_VERSION
return nvmlGpmMetricsGet(MetricsGet)
}
// nvml.GpmSampleFree()
func GpmSampleFree(GpmSample GpmSample) Return {
return nvmlGpmSampleFree(GpmSample)
}
// nvml.GpmSampleAlloc()
func GpmSampleAlloc(GpmSample *GpmSample) Return {
return nvmlGpmSampleAlloc(GpmSample)
}
// nvml.GpmSampleGet()
func GpmSampleGet(Device Device, GpmSample GpmSample) Return {
return nvmlGpmSampleGet(Device, GpmSample)
}
func (Device Device) GpmSampleGet(GpmSample GpmSample) Return {
return GpmSampleGet(Device, GpmSample)
}
// nvml.GpmQueryDeviceSupport()
type GpmSupportV struct {
device Device
}
func GpmQueryDeviceSupportV(Device Device) GpmSupportV {
return GpmSupportV{Device}
}
func (Device Device) GpmQueryDeviceSupportV() GpmSupportV {
return GpmSupportV{Device}
}
func (GpmSupportV GpmSupportV) V1() (GpmSupport, Return) {
var GpmSupport GpmSupport
GpmSupport.Version = 1
ret := nvmlGpmQueryDeviceSupport(GpmSupportV.device, &GpmSupport)
return GpmSupport, ret
}
func GpmQueryDeviceSupport(Device Device) (GpmSupport, Return) {
var GpmSupport GpmSupport
GpmSupport.Version = GPM_SUPPORT_VERSION
ret := nvmlGpmQueryDeviceSupport(Device, &GpmSupport)
return GpmSupport, ret
}
func (Device Device) GpmQueryDeviceSupport() (GpmSupport, Return) {
return GpmQueryDeviceSupport(Device)
}
// nvml.GpmMigSampleGet()
func GpmMigSampleGet(Device Device, GpuInstanceId int, GpmSample GpmSample) Return {
return nvmlGpmMigSampleGet(Device, uint32(GpuInstanceId), GpmSample)
}
func (Device Device) GpmMigSampleGet(GpuInstanceId int, GpmSample GpmSample) Return {
return GpmMigSampleGet(Device, GpuInstanceId, GpmSample)
}

View File

@ -14,45 +14,21 @@
package nvml package nvml
import (
"fmt"
"github.com/NVIDIA/go-nvml/pkg/dl"
)
import "C" import "C"
const (
nvmlLibraryName = "libnvidia-ml.so.1"
nvmlLibraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
)
var nvml *dl.DynamicLibrary
// nvml.Init() // nvml.Init()
func Init() Return { func Init() Return {
lib := dl.New(nvmlLibraryName, nvmlLibraryLoadFlags) if err := libnvml.load(); err != nil {
err := lib.Open()
if err != nil {
return ERROR_LIBRARY_NOT_FOUND return ERROR_LIBRARY_NOT_FOUND
} }
nvml = lib
updateVersionedSymbols()
return nvmlInit() return nvmlInit()
} }
// nvml.InitWithFlags() // nvml.InitWithFlags()
func InitWithFlags(Flags uint32) Return { func InitWithFlags(Flags uint32) Return {
lib := dl.New(nvmlLibraryName, nvmlLibraryLoadFlags) if err := libnvml.load(); err != nil {
err := lib.Open()
if err != nil {
return ERROR_LIBRARY_NOT_FOUND return ERROR_LIBRARY_NOT_FOUND
} }
nvml = lib
return nvmlInitWithFlags(Flags) return nvmlInitWithFlags(Flags)
} }
@ -63,156 +39,10 @@ func Shutdown() Return {
return ret return ret
} }
err := nvml.Close() err := libnvml.close()
if err != nil { if err != nil {
panic(fmt.Sprintf("error closing %s: %v", nvmlLibraryName, err)) panic(err)
} }
return ret return ret
} }
// Default all versioned APIs to v1 (to infer the types)
var nvmlInit = nvmlInit_v1
var nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v1
var nvmlDeviceGetCount = nvmlDeviceGetCount_v1
var nvmlDeviceGetHandleByIndex = nvmlDeviceGetHandleByIndex_v1
var nvmlDeviceGetHandleByPciBusId = nvmlDeviceGetHandleByPciBusId_v1
var nvmlDeviceGetNvLinkRemotePciInfo = nvmlDeviceGetNvLinkRemotePciInfo_v1
var nvmlDeviceRemoveGpu = nvmlDeviceRemoveGpu_v1
var nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v1
var nvmlEventSetWait = nvmlEventSetWait_v1
var nvmlDeviceGetAttributes = nvmlDeviceGetAttributes_v1
var nvmlComputeInstanceGetInfo = nvmlComputeInstanceGetInfo_v1
var DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v1
var DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v1
var DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v1
var GetBlacklistDeviceCount = GetExcludedDeviceCount
var GetBlacklistDeviceInfoByIndex = GetExcludedDeviceInfoByIndex
var nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v1
var nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v1
type BlacklistDeviceInfo = ExcludedDeviceInfo
type ProcessInfo_v1Slice []ProcessInfo_v1
type ProcessInfo_v2Slice []ProcessInfo_v2
func (pis ProcessInfo_v1Slice) ToProcessInfoSlice() []ProcessInfo {
var newInfos []ProcessInfo
for _, pi := range pis {
info := ProcessInfo{
Pid: pi.Pid,
UsedGpuMemory: pi.UsedGpuMemory,
GpuInstanceId: 0xFFFFFFFF, // GPU instance ID is invalid in v1
ComputeInstanceId: 0xFFFFFFFF, // Compute instance ID is invalid in v1
}
newInfos = append(newInfos, info)
}
return newInfos
}
func (pis ProcessInfo_v2Slice) ToProcessInfoSlice() []ProcessInfo {
var newInfos []ProcessInfo
for _, pi := range pis {
info := ProcessInfo{
Pid: pi.Pid,
UsedGpuMemory: pi.UsedGpuMemory,
GpuInstanceId: pi.GpuInstanceId,
ComputeInstanceId: pi.ComputeInstanceId,
}
newInfos = append(newInfos, info)
}
return newInfos
}
// updateVersionedSymbols()
func updateVersionedSymbols() {
err := nvml.Lookup("nvmlInit_v2")
if err == nil {
nvmlInit = nvmlInit_v2
}
err = nvml.Lookup("nvmlDeviceGetPciInfo_v2")
if err == nil {
nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v2
}
err = nvml.Lookup("nvmlDeviceGetPciInfo_v3")
if err == nil {
nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v3
}
err = nvml.Lookup("nvmlDeviceGetCount_v2")
if err == nil {
nvmlDeviceGetCount = nvmlDeviceGetCount_v2
}
err = nvml.Lookup("nvmlDeviceGetHandleByIndex_v2")
if err == nil {
nvmlDeviceGetHandleByIndex = nvmlDeviceGetHandleByIndex_v2
}
err = nvml.Lookup("nvmlDeviceGetHandleByPciBusId_v2")
if err == nil {
nvmlDeviceGetHandleByPciBusId = nvmlDeviceGetHandleByPciBusId_v2
}
err = nvml.Lookup("nvmlDeviceGetNvLinkRemotePciInfo_v2")
if err == nil {
nvmlDeviceGetNvLinkRemotePciInfo = nvmlDeviceGetNvLinkRemotePciInfo_v2
}
// Unable to overwrite nvmlDeviceRemoveGpu() because the v2 function takes
// a different set of parameters than the v1 function.
//err = nvml.Lookup("nvmlDeviceRemoveGpu_v2")
//if err == nil {
// nvmlDeviceRemoveGpu = nvmlDeviceRemoveGpu_v2
//}
err = nvml.Lookup("nvmlDeviceGetGridLicensableFeatures_v2")
if err == nil {
nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v2
}
err = nvml.Lookup("nvmlDeviceGetGridLicensableFeatures_v3")
if err == nil {
nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v3
}
err = nvml.Lookup("nvmlDeviceGetGridLicensableFeatures_v4")
if err == nil {
nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v4
}
err = nvml.Lookup("nvmlEventSetWait_v2")
if err == nil {
nvmlEventSetWait = nvmlEventSetWait_v2
}
err = nvml.Lookup("nvmlDeviceGetAttributes_v2")
if err == nil {
nvmlDeviceGetAttributes = nvmlDeviceGetAttributes_v2
}
err = nvml.Lookup("nvmlComputeInstanceGetInfo_v2")
if err == nil {
nvmlComputeInstanceGetInfo = nvmlComputeInstanceGetInfo_v2
}
err = nvml.Lookup("nvmlDeviceGetComputeRunningProcesses_v2")
if err == nil {
DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v2
}
err = nvml.Lookup("nvmlDeviceGetComputeRunningProcesses_v3")
if err == nil {
DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v3
}
err = nvml.Lookup("nvmlDeviceGetGraphicsRunningProcesses_v2")
if err == nil {
DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v2
}
err = nvml.Lookup("nvmlDeviceGetGraphicsRunningProcesses_v3")
if err == nil {
DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v3
}
err = nvml.Lookup("nvmlDeviceGetMPSComputeRunningProcesses_v2")
if err == nil {
DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v2
}
err = nvml.Lookup("nvmlDeviceGetMPSComputeRunningProcesses_v3")
if err == nil {
DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v3
}
err = nvml.Lookup("nvmlDeviceGetGpuInstancePossiblePlacements_v2")
if err == nil {
nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v2
}
err = nvml.Lookup("nvmlVgpuInstanceGetLicenseInfo_v2")
if err == nil {
nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v2
}
}

302
vendor/github.com/NVIDIA/go-nvml/pkg/nvml/lib.go generated vendored Normal file
View File

@ -0,0 +1,302 @@
/**
# Copyright 2023 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvml
import (
"errors"
"fmt"
"sync"
"github.com/NVIDIA/go-nvml/pkg/dl"
)
import "C"
const (
defaultNvmlLibraryName = "libnvidia-ml.so.1"
defaultNvmlLibraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
)
var errLibraryNotLoaded = errors.New("library not loaded")
var errLibraryAlreadyLoaded = errors.New("library already loaded")
// library represents an nvml library.
// This includes a reference to the underlying DynamicLibrary
type library struct {
sync.Mutex
path string
flags int
dl dynamicLibrary
}
// libnvml is a global instance of the nvml library.
var libnvml = library{
path: defaultNvmlLibraryName,
flags: defaultNvmlLibraryLoadFlags,
}
var _ Interface = (*library)(nil)
// GetLibrary returns a the library as a Library interface.
func (l *library) GetLibrary() Library {
return l
}
// GetLibrary returns a representation of the underlying library that implements the Library interface.
func GetLibrary() Library {
return libnvml.GetLibrary()
}
// Lookup checks whether the specified library symbol exists in the library.
// Note that this requires that the library be loaded.
func (l *library) Lookup(name string) error {
if l == nil || l.dl == nil {
return fmt.Errorf("error looking up %s: %w", name, errLibraryNotLoaded)
}
return l.dl.Lookup(name)
}
// newDynamicLibrary is a function variable that can be overridden for testing.
var newDynamicLibrary = func(path string, flags int) dynamicLibrary {
return dl.New(path, flags)
}
// load initializes the library and updates the versioned symbols.
// Multiple calls to an already loaded library will return without error.
func (l *library) load() error {
l.Lock()
defer l.Unlock()
if l.dl != nil {
return nil
}
dl := newDynamicLibrary(l.path, l.flags)
err := dl.Open()
if err != nil {
return fmt.Errorf("error opening %s: %w", l.path, err)
}
l.dl = dl
l.updateVersionedSymbols()
return nil
}
// close the underlying library and ensure that the global pointer to the
// library is set to nil to ensure that subsequent calls to open will reinitialize it.
// Multiple calls to an already closed nvml library will return without error.
func (l *library) close() error {
l.Lock()
defer l.Unlock()
if l.dl == nil {
return nil
}
err := l.dl.Close()
if err != nil {
return fmt.Errorf("error closing %s: %w", l.path, err)
}
l.dl = nil
return nil
}
// Default all versioned APIs to v1 (to infer the types)
var nvmlInit = nvmlInit_v1
var nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v1
var nvmlDeviceGetCount = nvmlDeviceGetCount_v1
var nvmlDeviceGetHandleByIndex = nvmlDeviceGetHandleByIndex_v1
var nvmlDeviceGetHandleByPciBusId = nvmlDeviceGetHandleByPciBusId_v1
var nvmlDeviceGetNvLinkRemotePciInfo = nvmlDeviceGetNvLinkRemotePciInfo_v1
var nvmlDeviceRemoveGpu = nvmlDeviceRemoveGpu_v1
var nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v1
var nvmlEventSetWait = nvmlEventSetWait_v1
var nvmlDeviceGetAttributes = nvmlDeviceGetAttributes_v1
var nvmlComputeInstanceGetInfo = nvmlComputeInstanceGetInfo_v1
var DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v1
var DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v1
var DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v1
var GetBlacklistDeviceCount = GetExcludedDeviceCount
var GetBlacklistDeviceInfoByIndex = GetExcludedDeviceInfoByIndex
var nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v1
var nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v1
// BlacklistDeviceInfo was replaced by ExcludedDeviceInfo
type BlacklistDeviceInfo = ExcludedDeviceInfo
type ProcessInfo_v1Slice []ProcessInfo_v1
type ProcessInfo_v2Slice []ProcessInfo_v2
func (pis ProcessInfo_v1Slice) ToProcessInfoSlice() []ProcessInfo {
var newInfos []ProcessInfo
for _, pi := range pis {
info := ProcessInfo{
Pid: pi.Pid,
UsedGpuMemory: pi.UsedGpuMemory,
GpuInstanceId: 0xFFFFFFFF, // GPU instance ID is invalid in v1
ComputeInstanceId: 0xFFFFFFFF, // Compute instance ID is invalid in v1
}
newInfos = append(newInfos, info)
}
return newInfos
}
func (pis ProcessInfo_v2Slice) ToProcessInfoSlice() []ProcessInfo {
var newInfos []ProcessInfo
for _, pi := range pis {
info := ProcessInfo{
Pid: pi.Pid,
UsedGpuMemory: pi.UsedGpuMemory,
GpuInstanceId: pi.GpuInstanceId,
ComputeInstanceId: pi.ComputeInstanceId,
}
newInfos = append(newInfos, info)
}
return newInfos
}
// updateVersionedSymbols checks for versioned symbols in the loaded dynamic library.
// If newer versioned symbols exist, these replace the default `v1` symbols initialized above.
// When new versioned symbols are added, these would have to be initialized above and have
// corresponding checks and subsequent assignments added below.
func (l *library) updateVersionedSymbols() {
err := l.Lookup("nvmlInit_v2")
if err == nil {
nvmlInit = nvmlInit_v2
}
err = l.Lookup("nvmlDeviceGetPciInfo_v2")
if err == nil {
nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v2
}
err = l.Lookup("nvmlDeviceGetPciInfo_v3")
if err == nil {
nvmlDeviceGetPciInfo = nvmlDeviceGetPciInfo_v3
}
err = l.Lookup("nvmlDeviceGetCount_v2")
if err == nil {
nvmlDeviceGetCount = nvmlDeviceGetCount_v2
}
err = l.Lookup("nvmlDeviceGetHandleByIndex_v2")
if err == nil {
nvmlDeviceGetHandleByIndex = nvmlDeviceGetHandleByIndex_v2
}
err = l.Lookup("nvmlDeviceGetHandleByPciBusId_v2")
if err == nil {
nvmlDeviceGetHandleByPciBusId = nvmlDeviceGetHandleByPciBusId_v2
}
err = l.Lookup("nvmlDeviceGetNvLinkRemotePciInfo_v2")
if err == nil {
nvmlDeviceGetNvLinkRemotePciInfo = nvmlDeviceGetNvLinkRemotePciInfo_v2
}
// Unable to overwrite nvmlDeviceRemoveGpu() because the v2 function takes
// a different set of parameters than the v1 function.
//err = l.Lookup("nvmlDeviceRemoveGpu_v2")
//if err == nil {
// nvmlDeviceRemoveGpu = nvmlDeviceRemoveGpu_v2
//}
err = l.Lookup("nvmlDeviceGetGridLicensableFeatures_v2")
if err == nil {
nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v2
}
err = l.Lookup("nvmlDeviceGetGridLicensableFeatures_v3")
if err == nil {
nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v3
}
err = l.Lookup("nvmlDeviceGetGridLicensableFeatures_v4")
if err == nil {
nvmlDeviceGetGridLicensableFeatures = nvmlDeviceGetGridLicensableFeatures_v4
}
err = l.Lookup("nvmlEventSetWait_v2")
if err == nil {
nvmlEventSetWait = nvmlEventSetWait_v2
}
err = l.Lookup("nvmlDeviceGetAttributes_v2")
if err == nil {
nvmlDeviceGetAttributes = nvmlDeviceGetAttributes_v2
}
err = l.Lookup("nvmlComputeInstanceGetInfo_v2")
if err == nil {
nvmlComputeInstanceGetInfo = nvmlComputeInstanceGetInfo_v2
}
err = l.Lookup("nvmlDeviceGetComputeRunningProcesses_v2")
if err == nil {
DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v2
}
err = l.Lookup("nvmlDeviceGetComputeRunningProcesses_v3")
if err == nil {
DeviceGetComputeRunningProcesses = deviceGetComputeRunningProcesses_v3
}
err = l.Lookup("nvmlDeviceGetGraphicsRunningProcesses_v2")
if err == nil {
DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v2
}
err = l.Lookup("nvmlDeviceGetGraphicsRunningProcesses_v3")
if err == nil {
DeviceGetGraphicsRunningProcesses = deviceGetGraphicsRunningProcesses_v3
}
err = l.Lookup("nvmlDeviceGetMPSComputeRunningProcesses_v2")
if err == nil {
DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v2
}
err = l.Lookup("nvmlDeviceGetMPSComputeRunningProcesses_v3")
if err == nil {
DeviceGetMPSComputeRunningProcesses = deviceGetMPSComputeRunningProcesses_v3
}
err = l.Lookup("nvmlDeviceGetGpuInstancePossiblePlacements_v2")
if err == nil {
nvmlDeviceGetGpuInstancePossiblePlacements = nvmlDeviceGetGpuInstancePossiblePlacements_v2
}
err = l.Lookup("nvmlVgpuInstanceGetLicenseInfo_v2")
if err == nil {
nvmlVgpuInstanceGetLicenseInfo = nvmlVgpuInstanceGetLicenseInfo_v2
}
}
// LibraryOption represents a functional option to configure the underlying NVML library
type LibraryOption func(*library)
// WithLibraryPath provides an option to set the library name to be used by the NVML library.
func WithLibraryPath(path string) LibraryOption {
return func(l *library) {
l.path = path
}
}
// SetLibraryOptions applies the specified options to the NVML library.
// If this is called when a library is already loaded, and error is raised.
func SetLibraryOptions(opts ...LibraryOption) error {
libnvml.Lock()
defer libnvml.Unlock()
if libnvml.dl != nil {
return errLibraryAlreadyLoaded
}
for _, opt := range opts {
opt(&libnvml)
}
if libnvml.path == "" {
libnvml.path = defaultNvmlLibraryName
}
if libnvml.flags == 0 {
libnvml.flags = defaultNvmlLibraryLoadFlags
}
return nil
}

View File

@ -18,7 +18,8 @@
package nvml package nvml
/* /*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files #cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup
#cgo CFLAGS: -DNVML_NO_UNVERSIONED_FUNC_DEFS=1 #cgo CFLAGS: -DNVML_NO_UNVERSIONED_FUNC_DEFS=1
#include "nvml.h" #include "nvml.h"
#include <stdlib.h> #include <stdlib.h>

View File

@ -438,7 +438,7 @@ func GetVgpuVersion() (VgpuVersion, VgpuVersion, Return) {
// nvml.SetVgpuVersion() // nvml.SetVgpuVersion()
func SetVgpuVersion(VgpuVersion *VgpuVersion) Return { func SetVgpuVersion(VgpuVersion *VgpuVersion) Return {
return SetVgpuVersion(VgpuVersion) return nvmlSetVgpuVersion(VgpuVersion)
} }
// nvml.VgpuInstanceClearAccountingPids() // nvml.VgpuInstanceClearAccountingPids()

2
vendor/modules.txt vendored
View File

@ -1,4 +1,4 @@
# github.com/NVIDIA/go-nvml v0.12.0-1 # github.com/NVIDIA/go-nvml v0.12.0-1.0.20231020145430-e06766c5e74f
## explicit; go 1.15 ## explicit; go 1.15
github.com/NVIDIA/go-nvml/pkg/dl github.com/NVIDIA/go-nvml/pkg/dl
github.com/NVIDIA/go-nvml/pkg/nvml github.com/NVIDIA/go-nvml/pkg/nvml