mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Add UsesNVGPUModule info function
This change adds a UsesNVGPUModule function that checks whether the nvgpu kernel module is used by NVML. This allows for more robust detection of Tegra-based platforms where libnvidia-ml.so is supported to enumerate the iGPU. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
81
internal/info/additional_info.go
Normal file
81
internal/info/additional_info.go
Normal file
@@ -0,0 +1,81 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// additionalInfo allows for the info.Interface to be extened to implement the infoInterface.
|
||||
type additionalInfo struct {
|
||||
info.Interface
|
||||
nvmllib nvml.Interface
|
||||
devicelib device.Interface
|
||||
}
|
||||
|
||||
// UsesNVGPUModule checks whether the nvgpu module is used.
|
||||
// We use the device name to signal this, since devices that use the nvgpu module have their device
|
||||
// names as:
|
||||
//
|
||||
// GPU 0: Orin (nvgpu) (UUID: 54d0709b-558d-5a59-9c65-0c5fc14a21a4)
|
||||
//
|
||||
// This function returns true if ALL devices use the nvgpu module.
|
||||
func (i additionalInfo) UsesNVGPUModule() (uses bool, reason string) {
|
||||
// We ensure that this function never panics
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
uses = false
|
||||
reason = fmt.Sprintf("panic: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
ret := i.nvmllib.Init()
|
||||
if ret != nvml.SUCCESS {
|
||||
return false, fmt.Sprintf("failed to initialize nvml: %v", ret)
|
||||
}
|
||||
defer i.nvmllib.Shutdown()
|
||||
|
||||
var names []string
|
||||
|
||||
err := i.devicelib.VisitDevices(func(i int, d device.Device) error {
|
||||
name, ret := d.GetName()
|
||||
if ret != nvml.SUCCESS {
|
||||
return fmt.Errorf("device %v: %v", i, ret)
|
||||
}
|
||||
names = append(names, name)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return false, fmt.Sprintf("failed to get device names: %v", err)
|
||||
}
|
||||
|
||||
if len(names) == 0 {
|
||||
return false, "no devices found"
|
||||
}
|
||||
|
||||
for _, name := range names {
|
||||
if !strings.Contains(name, "(nvgpu)") {
|
||||
return false, fmt.Sprintf("device %q does not use nvgpu module", name)
|
||||
}
|
||||
}
|
||||
return true, "all devices use nvgpu module"
|
||||
}
|
||||
Reference in New Issue
Block a user