Merge pull request #428 from elezar/fix-cdi-mode-resolution

Fix cdi mode resolution
This commit is contained in:
Evan Lezar 2024-05-21 13:22:10 +02:00 committed by GitHub
commit edda11d647
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 740 additions and 501 deletions

2
go.mod
View File

@ -3,7 +3,7 @@ module github.com/NVIDIA/nvidia-container-toolkit
go 1.20
require (
github.com/NVIDIA/go-nvlib v0.3.0
github.com/NVIDIA/go-nvlib v0.4.0
github.com/NVIDIA/go-nvml v0.12.0-6
github.com/fsnotify/fsnotify v1.7.0
github.com/opencontainers/runtime-spec v1.2.0

4
go.sum
View File

@ -1,5 +1,5 @@
github.com/NVIDIA/go-nvlib v0.3.0 h1:vd7jSOthJTqzqIWZrv317xDr1+Mnjoy5X4N69W9YwQM=
github.com/NVIDIA/go-nvlib v0.3.0/go.mod h1:NasUuId9hYFvwzuOHCu9F2X6oTU2tG0JHTfbJYuDAbA=
github.com/NVIDIA/go-nvlib v0.4.0 h1:dvuqjjSamBODFuxttPg4H/xtNVQRZOSlwFtuNKybcGI=
github.com/NVIDIA/go-nvlib v0.4.0/go.mod h1:87z49ULPr4GWPSGfSIp3taU4XENRYN/enIg88MzcL4k=
github.com/NVIDIA/go-nvml v0.12.0-6 h1:FJYc2KrpvX+VOC/8QQvMiQMmZ/nPMRpdJO/Ik4xfcr0=
github.com/NVIDIA/go-nvml v0.12.0-6/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=

View File

@ -17,75 +17,40 @@
package info
import (
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
// infoInterface provides an alias for mocking.
//
//go:generate moq -stub -out info-interface_mock.go . infoInterface
type infoInterface interface {
info.Interface
// UsesNVGPUModule indicates whether the system is using the nvgpu kernel module
UsesNVGPUModule() (bool, string)
}
type resolver struct {
logger logger.Interface
info infoInterface
}
// ResolveAutoMode determines the correct mode for the platform if set to "auto"
func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rmode string) {
nvinfo := info.New()
nvmllib := nvml.New()
devicelib := device.New(
device.WithNvml(nvmllib),
)
info := additionalInfo{
Interface: nvinfo,
nvmllib: nvmllib,
devicelib: devicelib,
}
r := resolver{
logger: logger,
info: info,
}
return r.resolveMode(mode, image)
return resolveMode(logger, mode, image, nil)
}
// resolveMode determines the correct mode for the platform if set to "auto"
func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
func resolveMode(logger logger.Interface, mode string, image image.CUDA, propertyExtractor info.PropertyExtractor) (rmode string) {
if mode != "auto" {
r.logger.Infof("Using requested mode '%s'", mode)
logger.Infof("Using requested mode '%s'", mode)
return mode
}
defer func() {
r.logger.Infof("Auto-detected mode as '%v'", rmode)
logger.Infof("Auto-detected mode as '%v'", rmode)
}()
if image.OnlyFullyQualifiedCDIDevices() {
return "cdi"
}
isTegra, reason := r.info.IsTegraSystem()
r.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
nvinfo := info.New(
info.WithLogger(logger),
info.WithPropertyExtractor(propertyExtractor),
)
hasNVML, reason := r.info.HasNvml()
r.logger.Debugf("Has NVML? %v: %v", hasNVML, reason)
usesNVGPUModule, reason := r.info.UsesNVGPUModule()
r.logger.Debugf("Uses nvgpu kernel module? %v: %v", usesNVGPUModule, reason)
if (isTegra && !hasNVML) || usesNVGPUModule {
switch nvinfo.ResolvePlatform() {
case info.PlatformNVML, info.PlatformWSL:
return "legacy"
case info.PlatformTegra:
return "csv"
}
return "legacy"
}

View File

@ -19,6 +19,7 @@ package info
import (
"testing"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
@ -202,23 +203,24 @@ func TestResolveAutoMode(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
info := &infoInterfaceMock{
properties := &info.PropertyExtractorMock{
HasNvmlFunc: func() (bool, string) {
return tc.info["nvml"], "nvml"
},
HasDXCoreFunc: func() (bool, string) {
return tc.info["dxcore"], "dxcore"
},
IsTegraSystemFunc: func() (bool, string) {
return tc.info["tegra"], "tegra"
},
UsesNVGPUModuleFunc: func() (bool, string) {
HasTegraFilesFunc: func() (bool, string) {
return tc.info["tegra"], "tegra"
},
UsesOnlyNVGPUModuleFunc: func() (bool, string) {
return tc.info["nvgpu"], "nvgpu"
},
}
r := resolver{
logger: logger,
info: info,
}
var mounts []specs.Mount
for _, d := range tc.mounts {
mount := specs.Mount{
@ -231,7 +233,7 @@ func TestResolveAutoMode(t *testing.T) {
image.WithEnvMap(tc.envmap),
image.WithMounts(mounts),
)
mode := r.resolveMode(tc.mode, image)
mode := resolveMode(logger, tc.mode, image, properties)
require.EqualValues(t, tc.expectedMode, mode)
})
}

View File

@ -1,194 +0,0 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package info
import (
"sync"
)
// Ensure, that infoInterfaceMock does implement infoInterface.
// If this is not the case, regenerate this file with moq.
var _ infoInterface = &infoInterfaceMock{}
// infoInterfaceMock is a mock implementation of infoInterface.
//
// func TestSomethingThatUsesinfoInterface(t *testing.T) {
//
// // make and configure a mocked infoInterface
// mockedinfoInterface := &infoInterfaceMock{
// HasDXCoreFunc: func() (bool, string) {
// panic("mock out the HasDXCore method")
// },
// HasNvmlFunc: func() (bool, string) {
// panic("mock out the HasNvml method")
// },
// IsTegraSystemFunc: func() (bool, string) {
// panic("mock out the IsTegraSystem method")
// },
// UsesNVGPUModuleFunc: func() (bool, string) {
// panic("mock out the UsesNVGPUModule method")
// },
// }
//
// // use mockedinfoInterface in code that requires infoInterface
// // and then make assertions.
//
// }
type infoInterfaceMock struct {
// HasDXCoreFunc mocks the HasDXCore method.
HasDXCoreFunc func() (bool, string)
// HasNvmlFunc mocks the HasNvml method.
HasNvmlFunc func() (bool, string)
// IsTegraSystemFunc mocks the IsTegraSystem method.
IsTegraSystemFunc func() (bool, string)
// UsesNVGPUModuleFunc mocks the UsesNVGPUModule method.
UsesNVGPUModuleFunc func() (bool, string)
// calls tracks calls to the methods.
calls struct {
// HasDXCore holds details about calls to the HasDXCore method.
HasDXCore []struct {
}
// HasNvml holds details about calls to the HasNvml method.
HasNvml []struct {
}
// IsTegraSystem holds details about calls to the IsTegraSystem method.
IsTegraSystem []struct {
}
// UsesNVGPUModule holds details about calls to the UsesNVGPUModule method.
UsesNVGPUModule []struct {
}
}
lockHasDXCore sync.RWMutex
lockHasNvml sync.RWMutex
lockIsTegraSystem sync.RWMutex
lockUsesNVGPUModule sync.RWMutex
}
// HasDXCore calls HasDXCoreFunc.
func (mock *infoInterfaceMock) HasDXCore() (bool, string) {
callInfo := struct {
}{}
mock.lockHasDXCore.Lock()
mock.calls.HasDXCore = append(mock.calls.HasDXCore, callInfo)
mock.lockHasDXCore.Unlock()
if mock.HasDXCoreFunc == nil {
var (
bOut bool
sOut string
)
return bOut, sOut
}
return mock.HasDXCoreFunc()
}
// HasDXCoreCalls gets all the calls that were made to HasDXCore.
// Check the length with:
//
// len(mockedinfoInterface.HasDXCoreCalls())
func (mock *infoInterfaceMock) HasDXCoreCalls() []struct {
} {
var calls []struct {
}
mock.lockHasDXCore.RLock()
calls = mock.calls.HasDXCore
mock.lockHasDXCore.RUnlock()
return calls
}
// HasNvml calls HasNvmlFunc.
func (mock *infoInterfaceMock) HasNvml() (bool, string) {
callInfo := struct {
}{}
mock.lockHasNvml.Lock()
mock.calls.HasNvml = append(mock.calls.HasNvml, callInfo)
mock.lockHasNvml.Unlock()
if mock.HasNvmlFunc == nil {
var (
bOut bool
sOut string
)
return bOut, sOut
}
return mock.HasNvmlFunc()
}
// HasNvmlCalls gets all the calls that were made to HasNvml.
// Check the length with:
//
// len(mockedinfoInterface.HasNvmlCalls())
func (mock *infoInterfaceMock) HasNvmlCalls() []struct {
} {
var calls []struct {
}
mock.lockHasNvml.RLock()
calls = mock.calls.HasNvml
mock.lockHasNvml.RUnlock()
return calls
}
// IsTegraSystem calls IsTegraSystemFunc.
func (mock *infoInterfaceMock) IsTegraSystem() (bool, string) {
callInfo := struct {
}{}
mock.lockIsTegraSystem.Lock()
mock.calls.IsTegraSystem = append(mock.calls.IsTegraSystem, callInfo)
mock.lockIsTegraSystem.Unlock()
if mock.IsTegraSystemFunc == nil {
var (
bOut bool
sOut string
)
return bOut, sOut
}
return mock.IsTegraSystemFunc()
}
// IsTegraSystemCalls gets all the calls that were made to IsTegraSystem.
// Check the length with:
//
// len(mockedinfoInterface.IsTegraSystemCalls())
func (mock *infoInterfaceMock) IsTegraSystemCalls() []struct {
} {
var calls []struct {
}
mock.lockIsTegraSystem.RLock()
calls = mock.calls.IsTegraSystem
mock.lockIsTegraSystem.RUnlock()
return calls
}
// UsesNVGPUModule calls UsesNVGPUModuleFunc.
func (mock *infoInterfaceMock) UsesNVGPUModule() (bool, string) {
callInfo := struct {
}{}
mock.lockUsesNVGPUModule.Lock()
mock.calls.UsesNVGPUModule = append(mock.calls.UsesNVGPUModule, callInfo)
mock.lockUsesNVGPUModule.Unlock()
if mock.UsesNVGPUModuleFunc == nil {
var (
bOut bool
sOut string
)
return bOut, sOut
}
return mock.UsesNVGPUModuleFunc()
}
// UsesNVGPUModuleCalls gets all the calls that were made to UsesNVGPUModule.
// Check the length with:
//
// len(mockedinfoInterface.UsesNVGPUModuleCalls())
func (mock *infoInterfaceMock) UsesNVGPUModuleCalls() []struct {
} {
var calls []struct {
}
mock.lockUsesNVGPUModule.RLock()
calls = mock.calls.UsesNVGPUModule
mock.lockUsesNVGPUModule.RUnlock()
return calls
}

View File

@ -91,7 +91,12 @@ func New(opts ...Option) (Interface, error) {
l.nvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook"
}
if l.infolib == nil {
l.infolib = info.New()
l.infolib = info.New(
info.WithRoot(l.driverRoot),
info.WithLogger(l.logger),
info.WithNvmlLib(l.nvmllib),
info.WithDeviceLib(l.devicelib),
)
}
l.driver = root.New(
@ -184,26 +189,19 @@ func (l *nvcdilib) resolveMode() (rmode string) {
return l.mode
}
defer func() {
l.logger.Infof("Auto-detected mode as %q", rmode)
l.logger.Infof("Auto-detected mode as '%v'", rmode)
}()
isWSL, reason := l.infolib.HasDXCore()
l.logger.Debugf("Is WSL-based system? %v: %v", isWSL, reason)
if isWSL {
platform := l.infolib.ResolvePlatform()
switch platform {
case info.PlatformNVML:
return ModeNvml
case info.PlatformTegra:
return ModeCSV
case info.PlatformWSL:
return ModeWsl
}
isNvml, reason := l.infolib.HasNvml()
l.logger.Debugf("Is NVML-based system? %v: %v", isNvml, reason)
isTegra, reason := l.infolib.IsTegraSystem()
l.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
if isTegra && !isNvml {
return ModeCSV
}
l.logger.Warningf("Unsupported platform detected: %v; assuming %v", platform, ModeNvml)
return ModeNvml
}

View File

@ -1,116 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package nvcdi
import (
"fmt"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestResolveMode(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
mode string
isTegra bool
hasDXCore bool
hasNVML bool
expected string
}{
{
mode: "auto",
hasDXCore: true,
expected: "wsl",
},
{
mode: "auto",
hasDXCore: false,
isTegra: true,
hasNVML: false,
expected: "csv",
},
{
mode: "auto",
hasDXCore: false,
isTegra: false,
hasNVML: false,
expected: "nvml",
},
{
mode: "auto",
hasDXCore: false,
isTegra: true,
hasNVML: true,
expected: "nvml",
},
{
mode: "auto",
hasDXCore: false,
isTegra: false,
expected: "nvml",
},
{
mode: "nvml",
hasDXCore: true,
isTegra: true,
expected: "nvml",
},
{
mode: "wsl",
hasDXCore: false,
expected: "wsl",
},
{
mode: "not-auto",
hasDXCore: true,
expected: "not-auto",
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
l := nvcdilib{
logger: logger,
mode: tc.mode,
infolib: infoMock{hasDXCore: tc.hasDXCore, isTegra: tc.isTegra, hasNVML: tc.hasNVML},
}
require.Equal(t, tc.expected, l.resolveMode())
})
}
}
type infoMock struct {
hasDXCore bool
isTegra bool
hasNVML bool
}
func (i infoMock) HasDXCore() (bool, string) {
return i.hasDXCore, ""
}
func (i infoMock) HasNvml() (bool, string) {
return i.hasNVML, ""
}
func (i infoMock) IsTegraSystem() (bool, string) {
return i.isTegra, ""
}

View File

@ -0,0 +1,41 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
// Interface provides the API to the info package.
type Interface interface {
PlatformResolver
PropertyExtractor
}
// PlatformResolver defines a function to resolve the current platform.
type PlatformResolver interface {
ResolvePlatform() Platform
}
// PropertyExtractor provides a set of functions to query capabilities of the
// system.
//
//go:generate moq -rm -out property-extractor_mock.go . PropertyExtractor
type PropertyExtractor interface {
HasDXCore() (bool, string)
HasNvml() (bool, string)
HasTegraFiles() (bool, string)
// Deprecated: Use HasTegraFiles instead.
IsTegraSystem() (bool, string)
UsesOnlyNVGPUModule() (bool, string)
}

View File

@ -0,0 +1,78 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
import (
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
)
type infolib struct {
PropertyExtractor
PlatformResolver
}
type options struct {
logger basicLogger
root root
nvmllib nvml.Interface
devicelib device.Interface
platform Platform
propertyExtractor PropertyExtractor
}
// New creates a new instance of the 'info' interface.
func New(opts ...Option) Interface {
o := &options{}
for _, opt := range opts {
opt(o)
}
if o.logger == nil {
o.logger = &nullLogger{}
}
if o.root == "" {
o.root = "/"
}
if o.nvmllib == nil {
o.nvmllib = nvml.New(
nvml.WithLibraryPath(o.root.tryResolveLibrary("libnvidia-ml.so.1")),
)
}
if o.devicelib == nil {
o.devicelib = device.New(device.WithNvml(o.nvmllib))
}
if o.platform == "" {
o.platform = PlatformAuto
}
if o.propertyExtractor == nil {
o.propertyExtractor = &propertyExtractor{
root: o.root,
nvmllib: o.nvmllib,
devicelib: o.devicelib,
}
}
return &infolib{
PlatformResolver: &platformResolver{
logger: o.logger,
platform: o.platform,
propertyExtractor: o.propertyExtractor,
},
PropertyExtractor: o.propertyExtractor,
}
}

View File

@ -1,102 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/go-nvml/pkg/dl"
)
// Interface provides the API to the info package.
type Interface interface {
HasDXCore() (bool, string)
HasNvml() (bool, string)
IsTegraSystem() (bool, string)
}
type infolib struct {
root string
}
var _ Interface = &infolib{}
// HasDXCore returns true if DXCore is detected on the system.
func (i *infolib) HasDXCore() (bool, string) {
const (
libraryName = "libdxcore.so"
)
if err := assertHasLibrary(libraryName); err != nil {
return false, fmt.Sprintf("could not load DXCore library: %v", err)
}
return true, "found DXCore library"
}
// HasNvml returns true if NVML is detected on the system.
func (i *infolib) HasNvml() (bool, string) {
const (
libraryName = "libnvidia-ml.so.1"
)
if err := assertHasLibrary(libraryName); err != nil {
return false, fmt.Sprintf("could not load NVML library: %v", err)
}
return true, "found NVML library"
}
// IsTegraSystem returns true if the system is detected as a Tegra-based system.
func (i *infolib) IsTegraSystem() (bool, string) {
tegraReleaseFile := filepath.Join(i.root, "/etc/nv_tegra_release")
tegraFamilyFile := filepath.Join(i.root, "/sys/devices/soc0/family")
if info, err := os.Stat(tegraReleaseFile); err == nil && !info.IsDir() {
return true, fmt.Sprintf("%v found", tegraReleaseFile)
}
if info, err := os.Stat(tegraFamilyFile); err != nil || info.IsDir() {
return false, fmt.Sprintf("%v file not found", tegraFamilyFile)
}
contents, err := os.ReadFile(tegraFamilyFile)
if err != nil {
return false, fmt.Sprintf("could not read %v", tegraFamilyFile)
}
if strings.HasPrefix(strings.ToLower(string(contents)), "tegra") {
return true, fmt.Sprintf("%v has 'tegra' prefix", tegraFamilyFile)
}
return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile)
}
// assertHasLibrary returns an error if the specified library cannot be loaded.
func assertHasLibrary(libraryName string) error {
const (
libraryLoadFlags = dl.RTLD_LAZY
)
lib := dl.New(libraryName, libraryLoadFlags)
if err := lib.Open(); err != nil {
return err
}
defer lib.Close()
return nil
}

View File

@ -0,0 +1,28 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
type basicLogger interface {
Debugf(string, ...interface{})
Infof(string, ...interface{})
}
type nullLogger struct{}
func (n *nullLogger) Debugf(string, ...interface{}) {}
func (n *nullLogger) Infof(string, ...interface{}) {}

View File

@ -16,24 +16,55 @@
package info
// Option defines a function for passing options to the New() call.
type Option func(*infolib)
import (
"github.com/NVIDIA/go-nvml/pkg/nvml"
// New creates a new instance of the 'info' interface.
func New(opts ...Option) Interface {
i := &infolib{}
for _, opt := range opts {
opt(i)
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
)
// Option defines a function for passing options to the New() call.
type Option func(*options)
// WithDeviceLib sets the device library for the library.
func WithDeviceLib(devicelib device.Interface) Option {
return func(i *options) {
i.devicelib = devicelib
}
if i.root == "" {
i.root = "/"
}
// WithLogger sets the logger for the library.
func WithLogger(logger basicLogger) Option {
return func(i *options) {
i.logger = logger
}
}
// WithNvmlLib sets the nvml library for the library.
func WithNvmlLib(nvmllib nvml.Interface) Option {
return func(i *options) {
i.nvmllib = nvmllib
}
return i
}
// WithRoot provides a Option to set the root of the 'info' interface.
func WithRoot(root string) Option {
return func(i *infolib) {
i.root = root
func WithRoot(r string) Option {
return func(i *options) {
i.root = root(r)
}
}
// WithPropertyExtractor provides an Option to set the PropertyExtractor
// interface implementation.
// This is predominantly used for testing.
func WithPropertyExtractor(propertyExtractor PropertyExtractor) Option {
return func(i *options) {
i.propertyExtractor = propertyExtractor
}
}
// WithPlatform provides an option to set the platform explicitly.
func WithPlatform(platform Platform) Option {
return func(i *options) {
i.platform = platform
}
}

View File

@ -0,0 +1,143 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
import (
"fmt"
"os"
"strings"
"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
)
type propertyExtractor struct {
root root
nvmllib nvml.Interface
devicelib device.Interface
}
var _ PropertyExtractor = &propertyExtractor{}
// HasDXCore returns true if DXCore is detected on the system.
func (i *propertyExtractor) HasDXCore() (bool, string) {
const (
libraryName = "libdxcore.so"
)
if err := i.root.assertHasLibrary(libraryName); err != nil {
return false, fmt.Sprintf("could not load DXCore library: %v", err)
}
return true, "found DXCore library"
}
// HasNvml returns true if NVML is detected on the system.
func (i *propertyExtractor) HasNvml() (bool, string) {
const (
libraryName = "libnvidia-ml.so.1"
)
if err := i.root.assertHasLibrary(libraryName); err != nil {
return false, fmt.Sprintf("could not load NVML library: %v", err)
}
return true, "found NVML library"
}
// IsTegraSystem returns true if the system is detected as a Tegra-based system.
// Deprecated: Use HasTegraFiles instead.
func (i *propertyExtractor) IsTegraSystem() (bool, string) {
return i.HasTegraFiles()
}
// HasTegraFiles returns true if tegra-based files are detected on the system.
func (i *propertyExtractor) HasTegraFiles() (bool, string) {
tegraReleaseFile := i.root.join("/etc/nv_tegra_release")
tegraFamilyFile := i.root.join("/sys/devices/soc0/family")
if info, err := os.Stat(tegraReleaseFile); err == nil && !info.IsDir() {
return true, fmt.Sprintf("%v found", tegraReleaseFile)
}
if info, err := os.Stat(tegraFamilyFile); err != nil || info.IsDir() {
return false, fmt.Sprintf("%v file not found", tegraFamilyFile)
}
contents, err := os.ReadFile(tegraFamilyFile)
if err != nil {
return false, fmt.Sprintf("could not read %v", tegraFamilyFile)
}
if strings.HasPrefix(strings.ToLower(string(contents)), "tegra") {
return true, fmt.Sprintf("%v has 'tegra' prefix", tegraFamilyFile)
}
return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile)
}
// UsesOnlyNVGPUModule checks whether the only the nvgpu module is used.
// This kernel module is used on Tegra-based systems when using the iGPU.
// Since some of these systems also support NVML, we use the device name
// reported by NVML to determine whether the system is an iGPU system.
//
// Devices that use the nvgpu module have their device names as:
//
// GPU 0: Orin (nvgpu) (UUID: 54d0709b-558d-5a59-9c65-0c5fc14a21a4)
//
// This function returns true if ALL devices use the nvgpu module.
func (i *propertyExtractor) UsesOnlyNVGPUModule() (uses bool, reason string) {
// We ensure that this function never panics
defer func() {
if err := recover(); err != nil {
uses = false
reason = fmt.Sprintf("panic: %v", err)
}
}()
ret := i.nvmllib.Init()
if ret != nvml.SUCCESS {
return false, fmt.Sprintf("failed to initialize nvml: %v", ret)
}
defer func() {
_ = i.nvmllib.Shutdown()
}()
var names []string
err := i.devicelib.VisitDevices(func(i int, d device.Device) error {
name, ret := d.GetName()
if ret != nvml.SUCCESS {
return fmt.Errorf("device %v: %v", i, ret)
}
names = append(names, name)
return nil
})
if err != nil {
return false, fmt.Sprintf("failed to get device names: %v", err)
}
if len(names) == 0 {
return false, "no devices found"
}
for _, name := range names {
if !strings.Contains(name, "(nvgpu)") {
return false, fmt.Sprintf("device %q does not use nvgpu module", name)
}
}
return true, "all devices use nvgpu module"
}

View File

@ -0,0 +1,215 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package info
import (
"sync"
)
// Ensure, that PropertyExtractorMock does implement PropertyExtractor.
// If this is not the case, regenerate this file with moq.
var _ PropertyExtractor = &PropertyExtractorMock{}
// PropertyExtractorMock is a mock implementation of PropertyExtractor.
//
// func TestSomethingThatUsesPropertyExtractor(t *testing.T) {
//
// // make and configure a mocked PropertyExtractor
// mockedPropertyExtractor := &PropertyExtractorMock{
// HasDXCoreFunc: func() (bool, string) {
// panic("mock out the HasDXCore method")
// },
// HasNvmlFunc: func() (bool, string) {
// panic("mock out the HasNvml method")
// },
// HasTegraFilesFunc: func() (bool, string) {
// panic("mock out the HasTegraFiles method")
// },
// IsTegraSystemFunc: func() (bool, string) {
// panic("mock out the IsTegraSystem method")
// },
// UsesOnlyNVGPUModuleFunc: func() (bool, string) {
// panic("mock out the UsesOnlyNVGPUModule method")
// },
// }
//
// // use mockedPropertyExtractor in code that requires PropertyExtractor
// // and then make assertions.
//
// }
type PropertyExtractorMock struct {
// HasDXCoreFunc mocks the HasDXCore method.
HasDXCoreFunc func() (bool, string)
// HasNvmlFunc mocks the HasNvml method.
HasNvmlFunc func() (bool, string)
// HasTegraFilesFunc mocks the HasTegraFiles method.
HasTegraFilesFunc func() (bool, string)
// IsTegraSystemFunc mocks the IsTegraSystem method.
IsTegraSystemFunc func() (bool, string)
// UsesOnlyNVGPUModuleFunc mocks the UsesOnlyNVGPUModule method.
UsesOnlyNVGPUModuleFunc func() (bool, string)
// calls tracks calls to the methods.
calls struct {
// HasDXCore holds details about calls to the HasDXCore method.
HasDXCore []struct {
}
// HasNvml holds details about calls to the HasNvml method.
HasNvml []struct {
}
// HasTegraFiles holds details about calls to the HasTegraFiles method.
HasTegraFiles []struct {
}
// IsTegraSystem holds details about calls to the IsTegraSystem method.
IsTegraSystem []struct {
}
// UsesOnlyNVGPUModule holds details about calls to the UsesOnlyNVGPUModule method.
UsesOnlyNVGPUModule []struct {
}
}
lockHasDXCore sync.RWMutex
lockHasNvml sync.RWMutex
lockHasTegraFiles sync.RWMutex
lockIsTegraSystem sync.RWMutex
lockUsesOnlyNVGPUModule sync.RWMutex
}
// HasDXCore calls HasDXCoreFunc.
func (mock *PropertyExtractorMock) HasDXCore() (bool, string) {
if mock.HasDXCoreFunc == nil {
panic("PropertyExtractorMock.HasDXCoreFunc: method is nil but PropertyExtractor.HasDXCore was just called")
}
callInfo := struct {
}{}
mock.lockHasDXCore.Lock()
mock.calls.HasDXCore = append(mock.calls.HasDXCore, callInfo)
mock.lockHasDXCore.Unlock()
return mock.HasDXCoreFunc()
}
// HasDXCoreCalls gets all the calls that were made to HasDXCore.
// Check the length with:
//
// len(mockedPropertyExtractor.HasDXCoreCalls())
func (mock *PropertyExtractorMock) HasDXCoreCalls() []struct {
} {
var calls []struct {
}
mock.lockHasDXCore.RLock()
calls = mock.calls.HasDXCore
mock.lockHasDXCore.RUnlock()
return calls
}
// HasNvml calls HasNvmlFunc.
func (mock *PropertyExtractorMock) HasNvml() (bool, string) {
if mock.HasNvmlFunc == nil {
panic("PropertyExtractorMock.HasNvmlFunc: method is nil but PropertyExtractor.HasNvml was just called")
}
callInfo := struct {
}{}
mock.lockHasNvml.Lock()
mock.calls.HasNvml = append(mock.calls.HasNvml, callInfo)
mock.lockHasNvml.Unlock()
return mock.HasNvmlFunc()
}
// HasNvmlCalls gets all the calls that were made to HasNvml.
// Check the length with:
//
// len(mockedPropertyExtractor.HasNvmlCalls())
func (mock *PropertyExtractorMock) HasNvmlCalls() []struct {
} {
var calls []struct {
}
mock.lockHasNvml.RLock()
calls = mock.calls.HasNvml
mock.lockHasNvml.RUnlock()
return calls
}
// HasTegraFiles calls HasTegraFilesFunc.
func (mock *PropertyExtractorMock) HasTegraFiles() (bool, string) {
if mock.HasTegraFilesFunc == nil {
panic("PropertyExtractorMock.HasTegraFilesFunc: method is nil but PropertyExtractor.HasTegraFiles was just called")
}
callInfo := struct {
}{}
mock.lockHasTegraFiles.Lock()
mock.calls.HasTegraFiles = append(mock.calls.HasTegraFiles, callInfo)
mock.lockHasTegraFiles.Unlock()
return mock.HasTegraFilesFunc()
}
// HasTegraFilesCalls gets all the calls that were made to HasTegraFiles.
// Check the length with:
//
// len(mockedPropertyExtractor.HasTegraFilesCalls())
func (mock *PropertyExtractorMock) HasTegraFilesCalls() []struct {
} {
var calls []struct {
}
mock.lockHasTegraFiles.RLock()
calls = mock.calls.HasTegraFiles
mock.lockHasTegraFiles.RUnlock()
return calls
}
// IsTegraSystem calls IsTegraSystemFunc.
func (mock *PropertyExtractorMock) IsTegraSystem() (bool, string) {
if mock.IsTegraSystemFunc == nil {
panic("PropertyExtractorMock.IsTegraSystemFunc: method is nil but PropertyExtractor.IsTegraSystem was just called")
}
callInfo := struct {
}{}
mock.lockIsTegraSystem.Lock()
mock.calls.IsTegraSystem = append(mock.calls.IsTegraSystem, callInfo)
mock.lockIsTegraSystem.Unlock()
return mock.IsTegraSystemFunc()
}
// IsTegraSystemCalls gets all the calls that were made to IsTegraSystem.
// Check the length with:
//
// len(mockedPropertyExtractor.IsTegraSystemCalls())
func (mock *PropertyExtractorMock) IsTegraSystemCalls() []struct {
} {
var calls []struct {
}
mock.lockIsTegraSystem.RLock()
calls = mock.calls.IsTegraSystem
mock.lockIsTegraSystem.RUnlock()
return calls
}
// UsesOnlyNVGPUModule calls UsesOnlyNVGPUModuleFunc.
func (mock *PropertyExtractorMock) UsesOnlyNVGPUModule() (bool, string) {
if mock.UsesOnlyNVGPUModuleFunc == nil {
panic("PropertyExtractorMock.UsesOnlyNVGPUModuleFunc: method is nil but PropertyExtractor.UsesOnlyNVGPUModule was just called")
}
callInfo := struct {
}{}
mock.lockUsesOnlyNVGPUModule.Lock()
mock.calls.UsesOnlyNVGPUModule = append(mock.calls.UsesOnlyNVGPUModule, callInfo)
mock.lockUsesOnlyNVGPUModule.Unlock()
return mock.UsesOnlyNVGPUModuleFunc()
}
// UsesOnlyNVGPUModuleCalls gets all the calls that were made to UsesOnlyNVGPUModule.
// Check the length with:
//
// len(mockedPropertyExtractor.UsesOnlyNVGPUModuleCalls())
func (mock *PropertyExtractorMock) UsesOnlyNVGPUModuleCalls() []struct {
} {
var calls []struct {
}
mock.lockUsesOnlyNVGPUModule.RLock()
calls = mock.calls.UsesOnlyNVGPUModule
mock.lockUsesOnlyNVGPUModule.RUnlock()
return calls
}

View File

@ -0,0 +1,64 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
// Platform represents a supported plaform.
type Platform string
const (
PlatformAuto = Platform("auto")
PlatformNVML = Platform("nvml")
PlatformTegra = Platform("tegra")
PlatformWSL = Platform("wsl")
PlatformUnknown = Platform("unknown")
)
type platformResolver struct {
logger basicLogger
platform Platform
propertyExtractor PropertyExtractor
}
func (p platformResolver) ResolvePlatform() Platform {
if p.platform != PlatformAuto {
p.logger.Infof("Using requested platform '%s'", p.platform)
return p.platform
}
hasDXCore, reason := p.propertyExtractor.HasDXCore()
p.logger.Debugf("Is WSL-based system? %v: %v", hasDXCore, reason)
hasTegraFiles, reason := p.propertyExtractor.HasTegraFiles()
p.logger.Debugf("Is Tegra-based system? %v: %v", hasTegraFiles, reason)
hasNVML, reason := p.propertyExtractor.HasNvml()
p.logger.Debugf("Is NVML-based system? %v: %v", hasNVML, reason)
usesOnlyNVGPUModule, reason := p.propertyExtractor.UsesOnlyNVGPUModule()
p.logger.Debugf("Uses nvgpu kernel module? %v: %v", usesOnlyNVGPUModule, reason)
switch {
case hasDXCore:
return PlatformWSL
case (hasTegraFiles && !hasNVML), usesOnlyNVGPUModule:
return PlatformTegra
case hasNVML:
return PlatformNVML
default:
return PlatformUnknown
}
}

View File

@ -0,0 +1,86 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package info
import (
"fmt"
"path/filepath"
"github.com/NVIDIA/go-nvml/pkg/dl"
)
// root represents a directory on the filesystem relative to which libraries
// such as the NVIDIA driver libraries can be found.
type root string
func (r root) join(parts ...string) string {
return filepath.Join(append([]string{string(r)}, parts...)...)
}
// assertHasLibrary returns an error if the specified library cannot be loaded.
func (r root) assertHasLibrary(libraryName string) error {
const (
libraryLoadFlags = dl.RTLD_LAZY
)
lib := dl.New(r.tryResolveLibrary(libraryName), libraryLoadFlags)
if err := lib.Open(); err != nil {
return err
}
defer lib.Close()
return nil
}
// tryResolveLibrary attempts to locate the specified library in the root.
// If the root is not specified, is "/", or the library cannot be found in the
// set of predefined paths, the input is returned as is.
func (r root) tryResolveLibrary(libraryName string) string {
if r == "" || r == "/" {
return libraryName
}
librarySearchPaths := []string{
"/usr/lib64",
"/usr/lib/x86_64-linux-gnu",
"/usr/lib/aarch64-linux-gnu",
"/lib64",
"/lib/x86_64-linux-gnu",
"/lib/aarch64-linux-gnu",
}
for _, d := range librarySearchPaths {
l := r.join(d, libraryName)
resolved, err := resolveLink(l)
if err != nil {
continue
}
return resolved
}
return libraryName
}
// resolveLink finds the target of a symlink or the file itself in the
// case of a regular file.
// This is equivalent to running `readlink -f ${l}`.
func resolveLink(l string) (string, error) {
resolved, err := filepath.EvalSymlinks(l)
if err != nil {
return "", fmt.Errorf("error resolving link '%v': %w", l, err)
}
return resolved, nil
}

2
vendor/modules.txt vendored
View File

@ -1,4 +1,4 @@
# github.com/NVIDIA/go-nvlib v0.3.0
# github.com/NVIDIA/go-nvlib v0.4.0
## explicit; go 1.20
github.com/NVIDIA/go-nvlib/pkg/nvlib/device
github.com/NVIDIA/go-nvlib/pkg/nvlib/info