Add UsesOnlyNVGPUModule check to PropertyExtractor interface

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2024-03-26 10:46:42 +02:00
parent 791d093c62
commit d1e08f17ea
5 changed files with 142 additions and 7 deletions

View File

@ -31,4 +31,5 @@ type PropertyExtractor interface {
HasTegraFiles() (bool, string)
// Deprecated: Use HasTegraFiles instead.
IsTegraSystem() (bool, string)
UsesOnlyNVGPUModule() (bool, string)
}

View File

@ -16,8 +16,16 @@
package info
import (
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
)
type options struct {
root root
root root
nvmllib nvml.Interface
devicelib device.Interface
}
// New creates a new instance of the 'info' Interface.
@ -29,7 +37,17 @@ func New(opts ...Option) Interface {
if o.root == "" {
o.root = "/"
}
if o.nvmllib == nil {
o.nvmllib = nvml.New(
nvml.WithLibraryPath(o.root.tryResolveLibrary("libnvidia-ml.so.1")),
)
}
if o.devicelib == nil {
o.devicelib = device.New(device.WithNvml(o.nvmllib))
}
return &propertyExtractor{
root: o.root,
root: o.root,
nvmllib: o.nvmllib,
devicelib: o.devicelib,
}
}

View File

@ -16,9 +16,29 @@
package info
import (
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
)
// Option defines a function for passing options to the New() call.
type Option func(*options)
// WithDeviceLib sets the device library for the library.
func WithDeviceLib(devicelib device.Interface) Option {
return func(l *options) {
l.devicelib = devicelib
}
}
// WithNvmlLib sets the nvml library for the library.
func WithNvmlLib(nvmllib nvml.Interface) Option {
return func(l *options) {
l.nvmllib = nvmllib
}
}
// WithRoot provides a Option to set the root of the 'info' interface.
func WithRoot(r string) Option {
return func(i *options) {

View File

@ -20,10 +20,16 @@ import (
"fmt"
"os"
"strings"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
)
type propertyExtractor struct {
root root
root root
nvmllib nvml.Interface
devicelib device.Interface
}
var _ Interface = &propertyExtractor{}
@ -82,3 +88,56 @@ func (i *propertyExtractor) HasTegraFiles() (bool, string) {
return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile)
}
// UsesOnlyNVGPUModule checks whether the only the nvgpu module is used.
// This kernel module is used on Tegra-based systems when using the iGPU.
// Since some of these systems also support NVML, we use the device name
// reported by NVML to determine whether the system is an iGPU system.
//
// Devices that use the nvgpu module have their device names as:
//
// GPU 0: Orin (nvgpu) (UUID: 54d0709b-558d-5a59-9c65-0c5fc14a21a4)
//
// This function returns true if ALL devices use the nvgpu module.
func (i *propertyExtractor) UsesOnlyNVGPUModule() (uses bool, reason string) {
// We ensure that this function never panics
defer func() {
if err := recover(); err != nil {
uses = false
reason = fmt.Sprintf("panic: %v", err)
}
}()
ret := i.nvmllib.Init()
if ret != nvml.SUCCESS {
return false, fmt.Sprintf("failed to initialize nvml: %v", ret)
}
defer func() {
_ = i.nvmllib.Shutdown()
}()
var names []string
err := i.devicelib.VisitDevices(func(i int, d device.Device) error {
name, ret := d.GetName()
if ret != nvml.SUCCESS {
return fmt.Errorf("device %v: %v", i, ret)
}
names = append(names, name)
return nil
})
if err != nil {
return false, fmt.Sprintf("failed to get device names: %v", err)
}
if len(names) == 0 {
return false, "no devices found"
}
for _, name := range names {
if !strings.Contains(name, "(nvgpu)") {
return false, fmt.Sprintf("device %q does not use nvgpu module", name)
}
}
return true, "all devices use nvgpu module"
}

View File

@ -29,6 +29,9 @@ var _ PropertyExtractor = &PropertyExtractorMock{}
// IsTegraSystemFunc: func() (bool, string) {
// panic("mock out the IsTegraSystem method")
// },
// UsesOnlyNVGPUModuleFunc: func() (bool, string) {
// panic("mock out the UsesOnlyNVGPUModule method")
// },
// }
//
// // use mockedPropertyExtractor in code that requires PropertyExtractor
@ -48,6 +51,9 @@ type PropertyExtractorMock struct {
// IsTegraSystemFunc mocks the IsTegraSystem method.
IsTegraSystemFunc func() (bool, string)
// UsesOnlyNVGPUModuleFunc mocks the UsesOnlyNVGPUModule method.
UsesOnlyNVGPUModuleFunc func() (bool, string)
// calls tracks calls to the methods.
calls struct {
// HasDXCore holds details about calls to the HasDXCore method.
@ -62,11 +68,15 @@ type PropertyExtractorMock struct {
// IsTegraSystem holds details about calls to the IsTegraSystem method.
IsTegraSystem []struct {
}
// UsesOnlyNVGPUModule holds details about calls to the UsesOnlyNVGPUModule method.
UsesOnlyNVGPUModule []struct {
}
}
lockHasDXCore sync.RWMutex
lockHasNvml sync.RWMutex
lockHasTegraFiles sync.RWMutex
lockIsTegraSystem sync.RWMutex
lockHasDXCore sync.RWMutex
lockHasNvml sync.RWMutex
lockHasTegraFiles sync.RWMutex
lockIsTegraSystem sync.RWMutex
lockUsesOnlyNVGPUModule sync.RWMutex
}
// HasDXCore calls HasDXCoreFunc.
@ -176,3 +186,30 @@ func (mock *PropertyExtractorMock) IsTegraSystemCalls() []struct {
mock.lockIsTegraSystem.RUnlock()
return calls
}
// UsesOnlyNVGPUModule calls UsesOnlyNVGPUModuleFunc.
func (mock *PropertyExtractorMock) UsesOnlyNVGPUModule() (bool, string) {
if mock.UsesOnlyNVGPUModuleFunc == nil {
panic("PropertyExtractorMock.UsesOnlyNVGPUModuleFunc: method is nil but PropertyExtractor.UsesOnlyNVGPUModule was just called")
}
callInfo := struct {
}{}
mock.lockUsesOnlyNVGPUModule.Lock()
mock.calls.UsesOnlyNVGPUModule = append(mock.calls.UsesOnlyNVGPUModule, callInfo)
mock.lockUsesOnlyNVGPUModule.Unlock()
return mock.UsesOnlyNVGPUModuleFunc()
}
// UsesOnlyNVGPUModuleCalls gets all the calls that were made to UsesOnlyNVGPUModule.
// Check the length with:
//
// len(mockedPropertyExtractor.UsesOnlyNVGPUModuleCalls())
func (mock *PropertyExtractorMock) UsesOnlyNVGPUModuleCalls() []struct {
} {
var calls []struct {
}
mock.lockUsesOnlyNVGPUModule.RLock()
calls = mock.calls.UsesOnlyNVGPUModule
mock.lockUsesOnlyNVGPUModule.RUnlock()
return calls
}