From d1e08f17ea893eead136008184e0e94392918cc0 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 26 Mar 2024 10:46:42 +0200 Subject: [PATCH] Add UsesOnlyNVGPUModule check to PropertyExtractor interface Signed-off-by: Evan Lezar --- pkg/nvlib/info/api.go | 1 + pkg/nvlib/info/builder.go | 22 +++++++- pkg/nvlib/info/options.go | 20 ++++++++ pkg/nvlib/info/property-extractor.go | 61 ++++++++++++++++++++++- pkg/nvlib/info/property-extractor_mock.go | 45 +++++++++++++++-- 5 files changed, 142 insertions(+), 7 deletions(-) diff --git a/pkg/nvlib/info/api.go b/pkg/nvlib/info/api.go index 41b8e22..b466bcb 100644 --- a/pkg/nvlib/info/api.go +++ b/pkg/nvlib/info/api.go @@ -31,4 +31,5 @@ type PropertyExtractor interface { HasTegraFiles() (bool, string) // Deprecated: Use HasTegraFiles instead. IsTegraSystem() (bool, string) + UsesOnlyNVGPUModule() (bool, string) } diff --git a/pkg/nvlib/info/builder.go b/pkg/nvlib/info/builder.go index d9275ca..bf2dd89 100644 --- a/pkg/nvlib/info/builder.go +++ b/pkg/nvlib/info/builder.go @@ -16,8 +16,16 @@ package info +import ( + "github.com/NVIDIA/go-nvml/pkg/nvml" + + "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" +) + type options struct { - root root + root root + nvmllib nvml.Interface + devicelib device.Interface } // New creates a new instance of the 'info' Interface. @@ -29,7 +37,17 @@ func New(opts ...Option) Interface { if o.root == "" { o.root = "/" } + if o.nvmllib == nil { + o.nvmllib = nvml.New( + nvml.WithLibraryPath(o.root.tryResolveLibrary("libnvidia-ml.so.1")), + ) + } + if o.devicelib == nil { + o.devicelib = device.New(device.WithNvml(o.nvmllib)) + } return &propertyExtractor{ - root: o.root, + root: o.root, + nvmllib: o.nvmllib, + devicelib: o.devicelib, } } diff --git a/pkg/nvlib/info/options.go b/pkg/nvlib/info/options.go index c4265d1..f8b47aa 100644 --- a/pkg/nvlib/info/options.go +++ b/pkg/nvlib/info/options.go @@ -16,9 +16,29 @@ package info +import ( + "github.com/NVIDIA/go-nvml/pkg/nvml" + + "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" +) + // Option defines a function for passing options to the New() call. type Option func(*options) +// WithDeviceLib sets the device library for the library. +func WithDeviceLib(devicelib device.Interface) Option { + return func(l *options) { + l.devicelib = devicelib + } +} + +// WithNvmlLib sets the nvml library for the library. +func WithNvmlLib(nvmllib nvml.Interface) Option { + return func(l *options) { + l.nvmllib = nvmllib + } +} + // WithRoot provides a Option to set the root of the 'info' interface. func WithRoot(r string) Option { return func(i *options) { diff --git a/pkg/nvlib/info/property-extractor.go b/pkg/nvlib/info/property-extractor.go index 9e41a54..43ec3b8 100644 --- a/pkg/nvlib/info/property-extractor.go +++ b/pkg/nvlib/info/property-extractor.go @@ -20,10 +20,16 @@ import ( "fmt" "os" "strings" + + "github.com/NVIDIA/go-nvml/pkg/nvml" + + "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" ) type propertyExtractor struct { - root root + root root + nvmllib nvml.Interface + devicelib device.Interface } var _ Interface = &propertyExtractor{} @@ -82,3 +88,56 @@ func (i *propertyExtractor) HasTegraFiles() (bool, string) { return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile) } + +// UsesOnlyNVGPUModule checks whether the only the nvgpu module is used. +// This kernel module is used on Tegra-based systems when using the iGPU. +// Since some of these systems also support NVML, we use the device name +// reported by NVML to determine whether the system is an iGPU system. +// +// Devices that use the nvgpu module have their device names as: +// +// GPU 0: Orin (nvgpu) (UUID: 54d0709b-558d-5a59-9c65-0c5fc14a21a4) +// +// This function returns true if ALL devices use the nvgpu module. +func (i *propertyExtractor) UsesOnlyNVGPUModule() (uses bool, reason string) { + // We ensure that this function never panics + defer func() { + if err := recover(); err != nil { + uses = false + reason = fmt.Sprintf("panic: %v", err) + } + }() + + ret := i.nvmllib.Init() + if ret != nvml.SUCCESS { + return false, fmt.Sprintf("failed to initialize nvml: %v", ret) + } + defer func() { + _ = i.nvmllib.Shutdown() + }() + + var names []string + + err := i.devicelib.VisitDevices(func(i int, d device.Device) error { + name, ret := d.GetName() + if ret != nvml.SUCCESS { + return fmt.Errorf("device %v: %v", i, ret) + } + names = append(names, name) + return nil + }) + if err != nil { + return false, fmt.Sprintf("failed to get device names: %v", err) + } + + if len(names) == 0 { + return false, "no devices found" + } + + for _, name := range names { + if !strings.Contains(name, "(nvgpu)") { + return false, fmt.Sprintf("device %q does not use nvgpu module", name) + } + } + return true, "all devices use nvgpu module" +} diff --git a/pkg/nvlib/info/property-extractor_mock.go b/pkg/nvlib/info/property-extractor_mock.go index 570dfe3..f2b057e 100644 --- a/pkg/nvlib/info/property-extractor_mock.go +++ b/pkg/nvlib/info/property-extractor_mock.go @@ -29,6 +29,9 @@ var _ PropertyExtractor = &PropertyExtractorMock{} // IsTegraSystemFunc: func() (bool, string) { // panic("mock out the IsTegraSystem method") // }, +// UsesOnlyNVGPUModuleFunc: func() (bool, string) { +// panic("mock out the UsesOnlyNVGPUModule method") +// }, // } // // // use mockedPropertyExtractor in code that requires PropertyExtractor @@ -48,6 +51,9 @@ type PropertyExtractorMock struct { // IsTegraSystemFunc mocks the IsTegraSystem method. IsTegraSystemFunc func() (bool, string) + // UsesOnlyNVGPUModuleFunc mocks the UsesOnlyNVGPUModule method. + UsesOnlyNVGPUModuleFunc func() (bool, string) + // calls tracks calls to the methods. calls struct { // HasDXCore holds details about calls to the HasDXCore method. @@ -62,11 +68,15 @@ type PropertyExtractorMock struct { // IsTegraSystem holds details about calls to the IsTegraSystem method. IsTegraSystem []struct { } + // UsesOnlyNVGPUModule holds details about calls to the UsesOnlyNVGPUModule method. + UsesOnlyNVGPUModule []struct { + } } - lockHasDXCore sync.RWMutex - lockHasNvml sync.RWMutex - lockHasTegraFiles sync.RWMutex - lockIsTegraSystem sync.RWMutex + lockHasDXCore sync.RWMutex + lockHasNvml sync.RWMutex + lockHasTegraFiles sync.RWMutex + lockIsTegraSystem sync.RWMutex + lockUsesOnlyNVGPUModule sync.RWMutex } // HasDXCore calls HasDXCoreFunc. @@ -176,3 +186,30 @@ func (mock *PropertyExtractorMock) IsTegraSystemCalls() []struct { mock.lockIsTegraSystem.RUnlock() return calls } + +// UsesOnlyNVGPUModule calls UsesOnlyNVGPUModuleFunc. +func (mock *PropertyExtractorMock) UsesOnlyNVGPUModule() (bool, string) { + if mock.UsesOnlyNVGPUModuleFunc == nil { + panic("PropertyExtractorMock.UsesOnlyNVGPUModuleFunc: method is nil but PropertyExtractor.UsesOnlyNVGPUModule was just called") + } + callInfo := struct { + }{} + mock.lockUsesOnlyNVGPUModule.Lock() + mock.calls.UsesOnlyNVGPUModule = append(mock.calls.UsesOnlyNVGPUModule, callInfo) + mock.lockUsesOnlyNVGPUModule.Unlock() + return mock.UsesOnlyNVGPUModuleFunc() +} + +// UsesOnlyNVGPUModuleCalls gets all the calls that were made to UsesOnlyNVGPUModule. +// Check the length with: +// +// len(mockedPropertyExtractor.UsesOnlyNVGPUModuleCalls()) +func (mock *PropertyExtractorMock) UsesOnlyNVGPUModuleCalls() []struct { +} { + var calls []struct { + } + mock.lockUsesOnlyNVGPUModule.RLock() + calls = mock.calls.UsesOnlyNVGPUModule + mock.lockUsesOnlyNVGPUModule.RUnlock() + return calls +}