diff --git a/go.mod b/go.mod index 56665223..b3443804 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NVIDIA/nvidia-container-toolkit go 1.20 require ( - github.com/NVIDIA/go-nvlib v0.3.0 + github.com/NVIDIA/go-nvlib v0.4.0 github.com/NVIDIA/go-nvml v0.12.0-6 github.com/fsnotify/fsnotify v1.7.0 github.com/opencontainers/runtime-spec v1.2.0 diff --git a/go.sum b/go.sum index 2fc510d6..c24a226c 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -github.com/NVIDIA/go-nvlib v0.3.0 h1:vd7jSOthJTqzqIWZrv317xDr1+Mnjoy5X4N69W9YwQM= -github.com/NVIDIA/go-nvlib v0.3.0/go.mod h1:NasUuId9hYFvwzuOHCu9F2X6oTU2tG0JHTfbJYuDAbA= +github.com/NVIDIA/go-nvlib v0.4.0 h1:dvuqjjSamBODFuxttPg4H/xtNVQRZOSlwFtuNKybcGI= +github.com/NVIDIA/go-nvlib v0.4.0/go.mod h1:87z49ULPr4GWPSGfSIp3taU4XENRYN/enIg88MzcL4k= github.com/NVIDIA/go-nvml v0.12.0-6 h1:FJYc2KrpvX+VOC/8QQvMiQMmZ/nPMRpdJO/Ik4xfcr0= github.com/NVIDIA/go-nvml v0.12.0-6/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= diff --git a/internal/info/auto.go b/internal/info/auto.go index 5d1426d9..c6800da1 100644 --- a/internal/info/auto.go +++ b/internal/info/auto.go @@ -17,75 +17,40 @@ package info import ( - "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" "github.com/NVIDIA/go-nvlib/pkg/nvlib/info" - "github.com/NVIDIA/go-nvml/pkg/nvml" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" ) -// infoInterface provides an alias for mocking. -// -//go:generate moq -stub -out info-interface_mock.go . infoInterface -type infoInterface interface { - info.Interface - // UsesNVGPUModule indicates whether the system is using the nvgpu kernel module - UsesNVGPUModule() (bool, string) -} - -type resolver struct { - logger logger.Interface - info infoInterface -} - // ResolveAutoMode determines the correct mode for the platform if set to "auto" func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rmode string) { - nvinfo := info.New() - nvmllib := nvml.New() - devicelib := device.New( - device.WithNvml(nvmllib), - ) - - info := additionalInfo{ - Interface: nvinfo, - nvmllib: nvmllib, - devicelib: devicelib, - } - - r := resolver{ - logger: logger, - info: info, - } - return r.resolveMode(mode, image) + return resolveMode(logger, mode, image, nil) } -// resolveMode determines the correct mode for the platform if set to "auto" -func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) { +func resolveMode(logger logger.Interface, mode string, image image.CUDA, propertyExtractor info.PropertyExtractor) (rmode string) { if mode != "auto" { - r.logger.Infof("Using requested mode '%s'", mode) + logger.Infof("Using requested mode '%s'", mode) return mode } defer func() { - r.logger.Infof("Auto-detected mode as '%v'", rmode) + logger.Infof("Auto-detected mode as '%v'", rmode) }() if image.OnlyFullyQualifiedCDIDevices() { return "cdi" } - isTegra, reason := r.info.IsTegraSystem() - r.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason) + nvinfo := info.New( + info.WithLogger(logger), + info.WithPropertyExtractor(propertyExtractor), + ) - hasNVML, reason := r.info.HasNvml() - r.logger.Debugf("Has NVML? %v: %v", hasNVML, reason) - - usesNVGPUModule, reason := r.info.UsesNVGPUModule() - r.logger.Debugf("Uses nvgpu kernel module? %v: %v", usesNVGPUModule, reason) - - if (isTegra && !hasNVML) || usesNVGPUModule { + switch nvinfo.ResolvePlatform() { + case info.PlatformNVML, info.PlatformWSL: + return "legacy" + case info.PlatformTegra: return "csv" } - return "legacy" } diff --git a/internal/info/auto_test.go b/internal/info/auto_test.go index e986ced7..4fbfcde4 100644 --- a/internal/info/auto_test.go +++ b/internal/info/auto_test.go @@ -19,6 +19,7 @@ package info import ( "testing" + "github.com/NVIDIA/go-nvlib/pkg/nvlib/info" "github.com/opencontainers/runtime-spec/specs-go" testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" @@ -202,23 +203,24 @@ func TestResolveAutoMode(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - info := &infoInterfaceMock{ + properties := &info.PropertyExtractorMock{ HasNvmlFunc: func() (bool, string) { return tc.info["nvml"], "nvml" }, + HasDXCoreFunc: func() (bool, string) { + return tc.info["dxcore"], "dxcore" + }, IsTegraSystemFunc: func() (bool, string) { return tc.info["tegra"], "tegra" }, - UsesNVGPUModuleFunc: func() (bool, string) { + HasTegraFilesFunc: func() (bool, string) { + return tc.info["tegra"], "tegra" + }, + UsesOnlyNVGPUModuleFunc: func() (bool, string) { return tc.info["nvgpu"], "nvgpu" }, } - r := resolver{ - logger: logger, - info: info, - } - var mounts []specs.Mount for _, d := range tc.mounts { mount := specs.Mount{ @@ -231,7 +233,7 @@ func TestResolveAutoMode(t *testing.T) { image.WithEnvMap(tc.envmap), image.WithMounts(mounts), ) - mode := r.resolveMode(tc.mode, image) + mode := resolveMode(logger, tc.mode, image, properties) require.EqualValues(t, tc.expectedMode, mode) }) } diff --git a/internal/info/info-interface_mock.go b/internal/info/info-interface_mock.go deleted file mode 100644 index c1e491c9..00000000 --- a/internal/info/info-interface_mock.go +++ /dev/null @@ -1,194 +0,0 @@ -// Code generated by moq; DO NOT EDIT. -// github.com/matryer/moq - -package info - -import ( - "sync" -) - -// Ensure, that infoInterfaceMock does implement infoInterface. -// If this is not the case, regenerate this file with moq. -var _ infoInterface = &infoInterfaceMock{} - -// infoInterfaceMock is a mock implementation of infoInterface. -// -// func TestSomethingThatUsesinfoInterface(t *testing.T) { -// -// // make and configure a mocked infoInterface -// mockedinfoInterface := &infoInterfaceMock{ -// HasDXCoreFunc: func() (bool, string) { -// panic("mock out the HasDXCore method") -// }, -// HasNvmlFunc: func() (bool, string) { -// panic("mock out the HasNvml method") -// }, -// IsTegraSystemFunc: func() (bool, string) { -// panic("mock out the IsTegraSystem method") -// }, -// UsesNVGPUModuleFunc: func() (bool, string) { -// panic("mock out the UsesNVGPUModule method") -// }, -// } -// -// // use mockedinfoInterface in code that requires infoInterface -// // and then make assertions. -// -// } -type infoInterfaceMock struct { - // HasDXCoreFunc mocks the HasDXCore method. - HasDXCoreFunc func() (bool, string) - - // HasNvmlFunc mocks the HasNvml method. - HasNvmlFunc func() (bool, string) - - // IsTegraSystemFunc mocks the IsTegraSystem method. - IsTegraSystemFunc func() (bool, string) - - // UsesNVGPUModuleFunc mocks the UsesNVGPUModule method. - UsesNVGPUModuleFunc func() (bool, string) - - // calls tracks calls to the methods. - calls struct { - // HasDXCore holds details about calls to the HasDXCore method. - HasDXCore []struct { - } - // HasNvml holds details about calls to the HasNvml method. - HasNvml []struct { - } - // IsTegraSystem holds details about calls to the IsTegraSystem method. - IsTegraSystem []struct { - } - // UsesNVGPUModule holds details about calls to the UsesNVGPUModule method. - UsesNVGPUModule []struct { - } - } - lockHasDXCore sync.RWMutex - lockHasNvml sync.RWMutex - lockIsTegraSystem sync.RWMutex - lockUsesNVGPUModule sync.RWMutex -} - -// HasDXCore calls HasDXCoreFunc. -func (mock *infoInterfaceMock) HasDXCore() (bool, string) { - callInfo := struct { - }{} - mock.lockHasDXCore.Lock() - mock.calls.HasDXCore = append(mock.calls.HasDXCore, callInfo) - mock.lockHasDXCore.Unlock() - if mock.HasDXCoreFunc == nil { - var ( - bOut bool - sOut string - ) - return bOut, sOut - } - return mock.HasDXCoreFunc() -} - -// HasDXCoreCalls gets all the calls that were made to HasDXCore. -// Check the length with: -// -// len(mockedinfoInterface.HasDXCoreCalls()) -func (mock *infoInterfaceMock) HasDXCoreCalls() []struct { -} { - var calls []struct { - } - mock.lockHasDXCore.RLock() - calls = mock.calls.HasDXCore - mock.lockHasDXCore.RUnlock() - return calls -} - -// HasNvml calls HasNvmlFunc. -func (mock *infoInterfaceMock) HasNvml() (bool, string) { - callInfo := struct { - }{} - mock.lockHasNvml.Lock() - mock.calls.HasNvml = append(mock.calls.HasNvml, callInfo) - mock.lockHasNvml.Unlock() - if mock.HasNvmlFunc == nil { - var ( - bOut bool - sOut string - ) - return bOut, sOut - } - return mock.HasNvmlFunc() -} - -// HasNvmlCalls gets all the calls that were made to HasNvml. -// Check the length with: -// -// len(mockedinfoInterface.HasNvmlCalls()) -func (mock *infoInterfaceMock) HasNvmlCalls() []struct { -} { - var calls []struct { - } - mock.lockHasNvml.RLock() - calls = mock.calls.HasNvml - mock.lockHasNvml.RUnlock() - return calls -} - -// IsTegraSystem calls IsTegraSystemFunc. -func (mock *infoInterfaceMock) IsTegraSystem() (bool, string) { - callInfo := struct { - }{} - mock.lockIsTegraSystem.Lock() - mock.calls.IsTegraSystem = append(mock.calls.IsTegraSystem, callInfo) - mock.lockIsTegraSystem.Unlock() - if mock.IsTegraSystemFunc == nil { - var ( - bOut bool - sOut string - ) - return bOut, sOut - } - return mock.IsTegraSystemFunc() -} - -// IsTegraSystemCalls gets all the calls that were made to IsTegraSystem. -// Check the length with: -// -// len(mockedinfoInterface.IsTegraSystemCalls()) -func (mock *infoInterfaceMock) IsTegraSystemCalls() []struct { -} { - var calls []struct { - } - mock.lockIsTegraSystem.RLock() - calls = mock.calls.IsTegraSystem - mock.lockIsTegraSystem.RUnlock() - return calls -} - -// UsesNVGPUModule calls UsesNVGPUModuleFunc. -func (mock *infoInterfaceMock) UsesNVGPUModule() (bool, string) { - callInfo := struct { - }{} - mock.lockUsesNVGPUModule.Lock() - mock.calls.UsesNVGPUModule = append(mock.calls.UsesNVGPUModule, callInfo) - mock.lockUsesNVGPUModule.Unlock() - if mock.UsesNVGPUModuleFunc == nil { - var ( - bOut bool - sOut string - ) - return bOut, sOut - } - return mock.UsesNVGPUModuleFunc() -} - -// UsesNVGPUModuleCalls gets all the calls that were made to UsesNVGPUModule. -// Check the length with: -// -// len(mockedinfoInterface.UsesNVGPUModuleCalls()) -func (mock *infoInterfaceMock) UsesNVGPUModuleCalls() []struct { -} { - var calls []struct { - } - mock.lockUsesNVGPUModule.RLock() - calls = mock.calls.UsesNVGPUModule - mock.lockUsesNVGPUModule.RUnlock() - return calls -} diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index c56c18e1..4753cf71 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -91,7 +91,12 @@ func New(opts ...Option) (Interface, error) { l.nvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" } if l.infolib == nil { - l.infolib = info.New() + l.infolib = info.New( + info.WithRoot(l.driverRoot), + info.WithLogger(l.logger), + info.WithNvmlLib(l.nvmllib), + info.WithDeviceLib(l.devicelib), + ) } l.driver = root.New( @@ -184,26 +189,19 @@ func (l *nvcdilib) resolveMode() (rmode string) { return l.mode } defer func() { - l.logger.Infof("Auto-detected mode as %q", rmode) + l.logger.Infof("Auto-detected mode as '%v'", rmode) }() - isWSL, reason := l.infolib.HasDXCore() - l.logger.Debugf("Is WSL-based system? %v: %v", isWSL, reason) - - if isWSL { + platform := l.infolib.ResolvePlatform() + switch platform { + case info.PlatformNVML: + return ModeNvml + case info.PlatformTegra: + return ModeCSV + case info.PlatformWSL: return ModeWsl } - - isNvml, reason := l.infolib.HasNvml() - l.logger.Debugf("Is NVML-based system? %v: %v", isNvml, reason) - - isTegra, reason := l.infolib.IsTegraSystem() - l.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason) - - if isTegra && !isNvml { - return ModeCSV - } - + l.logger.Warningf("Unsupported platform detected: %v; assuming %v", platform, ModeNvml) return ModeNvml } diff --git a/pkg/nvcdi/lib_test.go b/pkg/nvcdi/lib_test.go deleted file mode 100644 index b467ee5b..00000000 --- a/pkg/nvcdi/lib_test.go +++ /dev/null @@ -1,116 +0,0 @@ -/** -# Copyright (c) NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package nvcdi - -import ( - "fmt" - "testing" - - testlog "github.com/sirupsen/logrus/hooks/test" - "github.com/stretchr/testify/require" -) - -func TestResolveMode(t *testing.T) { - logger, _ := testlog.NewNullLogger() - - testCases := []struct { - mode string - isTegra bool - hasDXCore bool - hasNVML bool - expected string - }{ - { - mode: "auto", - hasDXCore: true, - expected: "wsl", - }, - { - mode: "auto", - hasDXCore: false, - isTegra: true, - hasNVML: false, - expected: "csv", - }, - { - mode: "auto", - hasDXCore: false, - isTegra: false, - hasNVML: false, - expected: "nvml", - }, - { - mode: "auto", - hasDXCore: false, - isTegra: true, - hasNVML: true, - expected: "nvml", - }, - { - mode: "auto", - hasDXCore: false, - isTegra: false, - expected: "nvml", - }, - { - mode: "nvml", - hasDXCore: true, - isTegra: true, - expected: "nvml", - }, - { - mode: "wsl", - hasDXCore: false, - expected: "wsl", - }, - { - mode: "not-auto", - hasDXCore: true, - expected: "not-auto", - }, - } - - for i, tc := range testCases { - t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { - l := nvcdilib{ - logger: logger, - mode: tc.mode, - infolib: infoMock{hasDXCore: tc.hasDXCore, isTegra: tc.isTegra, hasNVML: tc.hasNVML}, - } - - require.Equal(t, tc.expected, l.resolveMode()) - }) - } -} - -type infoMock struct { - hasDXCore bool - isTegra bool - hasNVML bool -} - -func (i infoMock) HasDXCore() (bool, string) { - return i.hasDXCore, "" -} - -func (i infoMock) HasNvml() (bool, string) { - return i.hasNVML, "" -} - -func (i infoMock) IsTegraSystem() (bool, string) { - return i.isTegra, "" -} diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/api.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/api.go new file mode 100644 index 00000000..1c62d636 --- /dev/null +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/api.go @@ -0,0 +1,41 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package info + +// Interface provides the API to the info package. +type Interface interface { + PlatformResolver + PropertyExtractor +} + +// PlatformResolver defines a function to resolve the current platform. +type PlatformResolver interface { + ResolvePlatform() Platform +} + +// PropertyExtractor provides a set of functions to query capabilities of the +// system. +// +//go:generate moq -rm -out property-extractor_mock.go . PropertyExtractor +type PropertyExtractor interface { + HasDXCore() (bool, string) + HasNvml() (bool, string) + HasTegraFiles() (bool, string) + // Deprecated: Use HasTegraFiles instead. + IsTegraSystem() (bool, string) + UsesOnlyNVGPUModule() (bool, string) +} diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/builder.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/builder.go new file mode 100644 index 00000000..87f20f02 --- /dev/null +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/builder.go @@ -0,0 +1,78 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package info + +import ( + "github.com/NVIDIA/go-nvml/pkg/nvml" + + "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" +) + +type infolib struct { + PropertyExtractor + PlatformResolver +} + +type options struct { + logger basicLogger + root root + nvmllib nvml.Interface + devicelib device.Interface + + platform Platform + propertyExtractor PropertyExtractor +} + +// New creates a new instance of the 'info' interface. +func New(opts ...Option) Interface { + o := &options{} + for _, opt := range opts { + opt(o) + } + if o.logger == nil { + o.logger = &nullLogger{} + } + if o.root == "" { + o.root = "/" + } + if o.nvmllib == nil { + o.nvmllib = nvml.New( + nvml.WithLibraryPath(o.root.tryResolveLibrary("libnvidia-ml.so.1")), + ) + } + if o.devicelib == nil { + o.devicelib = device.New(device.WithNvml(o.nvmllib)) + } + if o.platform == "" { + o.platform = PlatformAuto + } + if o.propertyExtractor == nil { + o.propertyExtractor = &propertyExtractor{ + root: o.root, + nvmllib: o.nvmllib, + devicelib: o.devicelib, + } + } + return &infolib{ + PlatformResolver: &platformResolver{ + logger: o.logger, + platform: o.platform, + propertyExtractor: o.propertyExtractor, + }, + PropertyExtractor: o.propertyExtractor, + } +} diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/info.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/info.go deleted file mode 100644 index 677270c3..00000000 --- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/info.go +++ /dev/null @@ -1,102 +0,0 @@ -/** -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package info - -import ( - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/NVIDIA/go-nvml/pkg/dl" -) - -// Interface provides the API to the info package. -type Interface interface { - HasDXCore() (bool, string) - HasNvml() (bool, string) - IsTegraSystem() (bool, string) -} - -type infolib struct { - root string -} - -var _ Interface = &infolib{} - -// HasDXCore returns true if DXCore is detected on the system. -func (i *infolib) HasDXCore() (bool, string) { - const ( - libraryName = "libdxcore.so" - ) - if err := assertHasLibrary(libraryName); err != nil { - return false, fmt.Sprintf("could not load DXCore library: %v", err) - } - - return true, "found DXCore library" -} - -// HasNvml returns true if NVML is detected on the system. -func (i *infolib) HasNvml() (bool, string) { - const ( - libraryName = "libnvidia-ml.so.1" - ) - if err := assertHasLibrary(libraryName); err != nil { - return false, fmt.Sprintf("could not load NVML library: %v", err) - } - - return true, "found NVML library" -} - -// IsTegraSystem returns true if the system is detected as a Tegra-based system. -func (i *infolib) IsTegraSystem() (bool, string) { - tegraReleaseFile := filepath.Join(i.root, "/etc/nv_tegra_release") - tegraFamilyFile := filepath.Join(i.root, "/sys/devices/soc0/family") - - if info, err := os.Stat(tegraReleaseFile); err == nil && !info.IsDir() { - return true, fmt.Sprintf("%v found", tegraReleaseFile) - } - - if info, err := os.Stat(tegraFamilyFile); err != nil || info.IsDir() { - return false, fmt.Sprintf("%v file not found", tegraFamilyFile) - } - - contents, err := os.ReadFile(tegraFamilyFile) - if err != nil { - return false, fmt.Sprintf("could not read %v", tegraFamilyFile) - } - - if strings.HasPrefix(strings.ToLower(string(contents)), "tegra") { - return true, fmt.Sprintf("%v has 'tegra' prefix", tegraFamilyFile) - } - - return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile) -} - -// assertHasLibrary returns an error if the specified library cannot be loaded. -func assertHasLibrary(libraryName string) error { - const ( - libraryLoadFlags = dl.RTLD_LAZY - ) - lib := dl.New(libraryName, libraryLoadFlags) - if err := lib.Open(); err != nil { - return err - } - defer lib.Close() - - return nil -} diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/logger.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/logger.go new file mode 100644 index 00000000..6a6f74ee --- /dev/null +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/logger.go @@ -0,0 +1,28 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package info + +type basicLogger interface { + Debugf(string, ...interface{}) + Infof(string, ...interface{}) +} + +type nullLogger struct{} + +func (n *nullLogger) Debugf(string, ...interface{}) {} + +func (n *nullLogger) Infof(string, ...interface{}) {} diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/options.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/options.go index ce72150c..e05c2bf7 100644 --- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/options.go +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/options.go @@ -16,24 +16,55 @@ package info -// Option defines a function for passing options to the New() call. -type Option func(*infolib) +import ( + "github.com/NVIDIA/go-nvml/pkg/nvml" -// New creates a new instance of the 'info' interface. -func New(opts ...Option) Interface { - i := &infolib{} - for _, opt := range opts { - opt(i) + "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" +) + +// Option defines a function for passing options to the New() call. +type Option func(*options) + +// WithDeviceLib sets the device library for the library. +func WithDeviceLib(devicelib device.Interface) Option { + return func(i *options) { + i.devicelib = devicelib } - if i.root == "" { - i.root = "/" +} + +// WithLogger sets the logger for the library. +func WithLogger(logger basicLogger) Option { + return func(i *options) { + i.logger = logger + } +} + +// WithNvmlLib sets the nvml library for the library. +func WithNvmlLib(nvmllib nvml.Interface) Option { + return func(i *options) { + i.nvmllib = nvmllib } - return i } // WithRoot provides a Option to set the root of the 'info' interface. -func WithRoot(root string) Option { - return func(i *infolib) { - i.root = root +func WithRoot(r string) Option { + return func(i *options) { + i.root = root(r) + } +} + +// WithPropertyExtractor provides an Option to set the PropertyExtractor +// interface implementation. +// This is predominantly used for testing. +func WithPropertyExtractor(propertyExtractor PropertyExtractor) Option { + return func(i *options) { + i.propertyExtractor = propertyExtractor + } +} + +// WithPlatform provides an option to set the platform explicitly. +func WithPlatform(platform Platform) Option { + return func(i *options) { + i.platform = platform } } diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/property-extractor.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/property-extractor.go new file mode 100644 index 00000000..5d5d97cd --- /dev/null +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/property-extractor.go @@ -0,0 +1,143 @@ +/** +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package info + +import ( + "fmt" + "os" + "strings" + + "github.com/NVIDIA/go-nvml/pkg/nvml" + + "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" +) + +type propertyExtractor struct { + root root + nvmllib nvml.Interface + devicelib device.Interface +} + +var _ PropertyExtractor = &propertyExtractor{} + +// HasDXCore returns true if DXCore is detected on the system. +func (i *propertyExtractor) HasDXCore() (bool, string) { + const ( + libraryName = "libdxcore.so" + ) + if err := i.root.assertHasLibrary(libraryName); err != nil { + return false, fmt.Sprintf("could not load DXCore library: %v", err) + } + + return true, "found DXCore library" +} + +// HasNvml returns true if NVML is detected on the system. +func (i *propertyExtractor) HasNvml() (bool, string) { + const ( + libraryName = "libnvidia-ml.so.1" + ) + if err := i.root.assertHasLibrary(libraryName); err != nil { + return false, fmt.Sprintf("could not load NVML library: %v", err) + } + + return true, "found NVML library" +} + +// IsTegraSystem returns true if the system is detected as a Tegra-based system. +// Deprecated: Use HasTegraFiles instead. +func (i *propertyExtractor) IsTegraSystem() (bool, string) { + return i.HasTegraFiles() +} + +// HasTegraFiles returns true if tegra-based files are detected on the system. +func (i *propertyExtractor) HasTegraFiles() (bool, string) { + tegraReleaseFile := i.root.join("/etc/nv_tegra_release") + tegraFamilyFile := i.root.join("/sys/devices/soc0/family") + + if info, err := os.Stat(tegraReleaseFile); err == nil && !info.IsDir() { + return true, fmt.Sprintf("%v found", tegraReleaseFile) + } + + if info, err := os.Stat(tegraFamilyFile); err != nil || info.IsDir() { + return false, fmt.Sprintf("%v file not found", tegraFamilyFile) + } + + contents, err := os.ReadFile(tegraFamilyFile) + if err != nil { + return false, fmt.Sprintf("could not read %v", tegraFamilyFile) + } + + if strings.HasPrefix(strings.ToLower(string(contents)), "tegra") { + return true, fmt.Sprintf("%v has 'tegra' prefix", tegraFamilyFile) + } + + return false, fmt.Sprintf("%v has no 'tegra' prefix", tegraFamilyFile) +} + +// UsesOnlyNVGPUModule checks whether the only the nvgpu module is used. +// This kernel module is used on Tegra-based systems when using the iGPU. +// Since some of these systems also support NVML, we use the device name +// reported by NVML to determine whether the system is an iGPU system. +// +// Devices that use the nvgpu module have their device names as: +// +// GPU 0: Orin (nvgpu) (UUID: 54d0709b-558d-5a59-9c65-0c5fc14a21a4) +// +// This function returns true if ALL devices use the nvgpu module. +func (i *propertyExtractor) UsesOnlyNVGPUModule() (uses bool, reason string) { + // We ensure that this function never panics + defer func() { + if err := recover(); err != nil { + uses = false + reason = fmt.Sprintf("panic: %v", err) + } + }() + + ret := i.nvmllib.Init() + if ret != nvml.SUCCESS { + return false, fmt.Sprintf("failed to initialize nvml: %v", ret) + } + defer func() { + _ = i.nvmllib.Shutdown() + }() + + var names []string + + err := i.devicelib.VisitDevices(func(i int, d device.Device) error { + name, ret := d.GetName() + if ret != nvml.SUCCESS { + return fmt.Errorf("device %v: %v", i, ret) + } + names = append(names, name) + return nil + }) + if err != nil { + return false, fmt.Sprintf("failed to get device names: %v", err) + } + + if len(names) == 0 { + return false, "no devices found" + } + + for _, name := range names { + if !strings.Contains(name, "(nvgpu)") { + return false, fmt.Sprintf("device %q does not use nvgpu module", name) + } + } + return true, "all devices use nvgpu module" +} diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/property-extractor_mock.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/property-extractor_mock.go new file mode 100644 index 00000000..f2b057e6 --- /dev/null +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/property-extractor_mock.go @@ -0,0 +1,215 @@ +// Code generated by moq; DO NOT EDIT. +// github.com/matryer/moq + +package info + +import ( + "sync" +) + +// Ensure, that PropertyExtractorMock does implement PropertyExtractor. +// If this is not the case, regenerate this file with moq. +var _ PropertyExtractor = &PropertyExtractorMock{} + +// PropertyExtractorMock is a mock implementation of PropertyExtractor. +// +// func TestSomethingThatUsesPropertyExtractor(t *testing.T) { +// +// // make and configure a mocked PropertyExtractor +// mockedPropertyExtractor := &PropertyExtractorMock{ +// HasDXCoreFunc: func() (bool, string) { +// panic("mock out the HasDXCore method") +// }, +// HasNvmlFunc: func() (bool, string) { +// panic("mock out the HasNvml method") +// }, +// HasTegraFilesFunc: func() (bool, string) { +// panic("mock out the HasTegraFiles method") +// }, +// IsTegraSystemFunc: func() (bool, string) { +// panic("mock out the IsTegraSystem method") +// }, +// UsesOnlyNVGPUModuleFunc: func() (bool, string) { +// panic("mock out the UsesOnlyNVGPUModule method") +// }, +// } +// +// // use mockedPropertyExtractor in code that requires PropertyExtractor +// // and then make assertions. +// +// } +type PropertyExtractorMock struct { + // HasDXCoreFunc mocks the HasDXCore method. + HasDXCoreFunc func() (bool, string) + + // HasNvmlFunc mocks the HasNvml method. + HasNvmlFunc func() (bool, string) + + // HasTegraFilesFunc mocks the HasTegraFiles method. + HasTegraFilesFunc func() (bool, string) + + // IsTegraSystemFunc mocks the IsTegraSystem method. + IsTegraSystemFunc func() (bool, string) + + // UsesOnlyNVGPUModuleFunc mocks the UsesOnlyNVGPUModule method. + UsesOnlyNVGPUModuleFunc func() (bool, string) + + // calls tracks calls to the methods. + calls struct { + // HasDXCore holds details about calls to the HasDXCore method. + HasDXCore []struct { + } + // HasNvml holds details about calls to the HasNvml method. + HasNvml []struct { + } + // HasTegraFiles holds details about calls to the HasTegraFiles method. + HasTegraFiles []struct { + } + // IsTegraSystem holds details about calls to the IsTegraSystem method. + IsTegraSystem []struct { + } + // UsesOnlyNVGPUModule holds details about calls to the UsesOnlyNVGPUModule method. + UsesOnlyNVGPUModule []struct { + } + } + lockHasDXCore sync.RWMutex + lockHasNvml sync.RWMutex + lockHasTegraFiles sync.RWMutex + lockIsTegraSystem sync.RWMutex + lockUsesOnlyNVGPUModule sync.RWMutex +} + +// HasDXCore calls HasDXCoreFunc. +func (mock *PropertyExtractorMock) HasDXCore() (bool, string) { + if mock.HasDXCoreFunc == nil { + panic("PropertyExtractorMock.HasDXCoreFunc: method is nil but PropertyExtractor.HasDXCore was just called") + } + callInfo := struct { + }{} + mock.lockHasDXCore.Lock() + mock.calls.HasDXCore = append(mock.calls.HasDXCore, callInfo) + mock.lockHasDXCore.Unlock() + return mock.HasDXCoreFunc() +} + +// HasDXCoreCalls gets all the calls that were made to HasDXCore. +// Check the length with: +// +// len(mockedPropertyExtractor.HasDXCoreCalls()) +func (mock *PropertyExtractorMock) HasDXCoreCalls() []struct { +} { + var calls []struct { + } + mock.lockHasDXCore.RLock() + calls = mock.calls.HasDXCore + mock.lockHasDXCore.RUnlock() + return calls +} + +// HasNvml calls HasNvmlFunc. +func (mock *PropertyExtractorMock) HasNvml() (bool, string) { + if mock.HasNvmlFunc == nil { + panic("PropertyExtractorMock.HasNvmlFunc: method is nil but PropertyExtractor.HasNvml was just called") + } + callInfo := struct { + }{} + mock.lockHasNvml.Lock() + mock.calls.HasNvml = append(mock.calls.HasNvml, callInfo) + mock.lockHasNvml.Unlock() + return mock.HasNvmlFunc() +} + +// HasNvmlCalls gets all the calls that were made to HasNvml. +// Check the length with: +// +// len(mockedPropertyExtractor.HasNvmlCalls()) +func (mock *PropertyExtractorMock) HasNvmlCalls() []struct { +} { + var calls []struct { + } + mock.lockHasNvml.RLock() + calls = mock.calls.HasNvml + mock.lockHasNvml.RUnlock() + return calls +} + +// HasTegraFiles calls HasTegraFilesFunc. +func (mock *PropertyExtractorMock) HasTegraFiles() (bool, string) { + if mock.HasTegraFilesFunc == nil { + panic("PropertyExtractorMock.HasTegraFilesFunc: method is nil but PropertyExtractor.HasTegraFiles was just called") + } + callInfo := struct { + }{} + mock.lockHasTegraFiles.Lock() + mock.calls.HasTegraFiles = append(mock.calls.HasTegraFiles, callInfo) + mock.lockHasTegraFiles.Unlock() + return mock.HasTegraFilesFunc() +} + +// HasTegraFilesCalls gets all the calls that were made to HasTegraFiles. +// Check the length with: +// +// len(mockedPropertyExtractor.HasTegraFilesCalls()) +func (mock *PropertyExtractorMock) HasTegraFilesCalls() []struct { +} { + var calls []struct { + } + mock.lockHasTegraFiles.RLock() + calls = mock.calls.HasTegraFiles + mock.lockHasTegraFiles.RUnlock() + return calls +} + +// IsTegraSystem calls IsTegraSystemFunc. +func (mock *PropertyExtractorMock) IsTegraSystem() (bool, string) { + if mock.IsTegraSystemFunc == nil { + panic("PropertyExtractorMock.IsTegraSystemFunc: method is nil but PropertyExtractor.IsTegraSystem was just called") + } + callInfo := struct { + }{} + mock.lockIsTegraSystem.Lock() + mock.calls.IsTegraSystem = append(mock.calls.IsTegraSystem, callInfo) + mock.lockIsTegraSystem.Unlock() + return mock.IsTegraSystemFunc() +} + +// IsTegraSystemCalls gets all the calls that were made to IsTegraSystem. +// Check the length with: +// +// len(mockedPropertyExtractor.IsTegraSystemCalls()) +func (mock *PropertyExtractorMock) IsTegraSystemCalls() []struct { +} { + var calls []struct { + } + mock.lockIsTegraSystem.RLock() + calls = mock.calls.IsTegraSystem + mock.lockIsTegraSystem.RUnlock() + return calls +} + +// UsesOnlyNVGPUModule calls UsesOnlyNVGPUModuleFunc. +func (mock *PropertyExtractorMock) UsesOnlyNVGPUModule() (bool, string) { + if mock.UsesOnlyNVGPUModuleFunc == nil { + panic("PropertyExtractorMock.UsesOnlyNVGPUModuleFunc: method is nil but PropertyExtractor.UsesOnlyNVGPUModule was just called") + } + callInfo := struct { + }{} + mock.lockUsesOnlyNVGPUModule.Lock() + mock.calls.UsesOnlyNVGPUModule = append(mock.calls.UsesOnlyNVGPUModule, callInfo) + mock.lockUsesOnlyNVGPUModule.Unlock() + return mock.UsesOnlyNVGPUModuleFunc() +} + +// UsesOnlyNVGPUModuleCalls gets all the calls that were made to UsesOnlyNVGPUModule. +// Check the length with: +// +// len(mockedPropertyExtractor.UsesOnlyNVGPUModuleCalls()) +func (mock *PropertyExtractorMock) UsesOnlyNVGPUModuleCalls() []struct { +} { + var calls []struct { + } + mock.lockUsesOnlyNVGPUModule.RLock() + calls = mock.calls.UsesOnlyNVGPUModule + mock.lockUsesOnlyNVGPUModule.RUnlock() + return calls +} diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/resolver.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/resolver.go new file mode 100644 index 00000000..1aeb04c0 --- /dev/null +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/resolver.go @@ -0,0 +1,64 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package info + +// Platform represents a supported plaform. +type Platform string + +const ( + PlatformAuto = Platform("auto") + PlatformNVML = Platform("nvml") + PlatformTegra = Platform("tegra") + PlatformWSL = Platform("wsl") + PlatformUnknown = Platform("unknown") +) + +type platformResolver struct { + logger basicLogger + platform Platform + propertyExtractor PropertyExtractor +} + +func (p platformResolver) ResolvePlatform() Platform { + if p.platform != PlatformAuto { + p.logger.Infof("Using requested platform '%s'", p.platform) + return p.platform + } + + hasDXCore, reason := p.propertyExtractor.HasDXCore() + p.logger.Debugf("Is WSL-based system? %v: %v", hasDXCore, reason) + + hasTegraFiles, reason := p.propertyExtractor.HasTegraFiles() + p.logger.Debugf("Is Tegra-based system? %v: %v", hasTegraFiles, reason) + + hasNVML, reason := p.propertyExtractor.HasNvml() + p.logger.Debugf("Is NVML-based system? %v: %v", hasNVML, reason) + + usesOnlyNVGPUModule, reason := p.propertyExtractor.UsesOnlyNVGPUModule() + p.logger.Debugf("Uses nvgpu kernel module? %v: %v", usesOnlyNVGPUModule, reason) + + switch { + case hasDXCore: + return PlatformWSL + case (hasTegraFiles && !hasNVML), usesOnlyNVGPUModule: + return PlatformTegra + case hasNVML: + return PlatformNVML + default: + return PlatformUnknown + } +} diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/root.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/root.go new file mode 100644 index 00000000..d38dc735 --- /dev/null +++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/root.go @@ -0,0 +1,86 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package info + +import ( + "fmt" + "path/filepath" + + "github.com/NVIDIA/go-nvml/pkg/dl" +) + +// root represents a directory on the filesystem relative to which libraries +// such as the NVIDIA driver libraries can be found. +type root string + +func (r root) join(parts ...string) string { + return filepath.Join(append([]string{string(r)}, parts...)...) +} + +// assertHasLibrary returns an error if the specified library cannot be loaded. +func (r root) assertHasLibrary(libraryName string) error { + const ( + libraryLoadFlags = dl.RTLD_LAZY + ) + lib := dl.New(r.tryResolveLibrary(libraryName), libraryLoadFlags) + if err := lib.Open(); err != nil { + return err + } + defer lib.Close() + + return nil +} + +// tryResolveLibrary attempts to locate the specified library in the root. +// If the root is not specified, is "/", or the library cannot be found in the +// set of predefined paths, the input is returned as is. +func (r root) tryResolveLibrary(libraryName string) string { + if r == "" || r == "/" { + return libraryName + } + + librarySearchPaths := []string{ + "/usr/lib64", + "/usr/lib/x86_64-linux-gnu", + "/usr/lib/aarch64-linux-gnu", + "/lib64", + "/lib/x86_64-linux-gnu", + "/lib/aarch64-linux-gnu", + } + + for _, d := range librarySearchPaths { + l := r.join(d, libraryName) + resolved, err := resolveLink(l) + if err != nil { + continue + } + return resolved + } + + return libraryName +} + +// resolveLink finds the target of a symlink or the file itself in the +// case of a regular file. +// This is equivalent to running `readlink -f ${l}`. +func resolveLink(l string) (string, error) { + resolved, err := filepath.EvalSymlinks(l) + if err != nil { + return "", fmt.Errorf("error resolving link '%v': %w", l, err) + } + return resolved, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index ee1fcd7e..fc3b76d4 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/NVIDIA/go-nvlib v0.3.0 +# github.com/NVIDIA/go-nvlib v0.4.0 ## explicit; go 1.20 github.com/NVIDIA/go-nvlib/pkg/nvlib/device github.com/NVIDIA/go-nvlib/pkg/nvlib/info