mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 16:29:18 +00:00
1dc028cdf2
This change adds a UsesNVGPUModule function that checks whether the nvgpu kernel module is used by NVML. This allows for more robust detection of Tegra-based platforms where libnvidia-ml.so is supported to enumerate the iGPU. Signed-off-by: Evan Lezar <elezar@nvidia.com>
102 lines
2.8 KiB
Go
102 lines
2.8 KiB
Go
/**
|
|
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
**/
|
|
|
|
package info
|
|
|
|
import (
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
|
cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
|
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
|
)
|
|
|
|
// infoInterface provides an alias for mocking.
|
|
//
|
|
//go:generate moq -stub -out info-interface_mock.go . infoInterface
|
|
type infoInterface interface {
|
|
info.Interface
|
|
// UsesNVGPUModule indicates whether the system is using the nvgpu kernel module
|
|
UsesNVGPUModule() (bool, string)
|
|
}
|
|
|
|
type resolver struct {
|
|
logger logger.Interface
|
|
info infoInterface
|
|
}
|
|
|
|
// ResolveAutoMode determines the correct mode for the platform if set to "auto"
|
|
func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rmode string) {
|
|
nvinfo := info.New()
|
|
nvmllib := nvml.New()
|
|
devicelib := device.New(
|
|
device.WithNvml(nvmllib),
|
|
)
|
|
|
|
info := additionalInfo{
|
|
Interface: nvinfo,
|
|
nvmllib: nvmllib,
|
|
devicelib: devicelib,
|
|
}
|
|
|
|
r := resolver{
|
|
logger: logger,
|
|
info: info,
|
|
}
|
|
return r.resolveMode(mode, image)
|
|
}
|
|
|
|
// resolveMode determines the correct mode for the platform if set to "auto"
|
|
func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
|
|
if mode != "auto" {
|
|
return mode
|
|
}
|
|
defer func() {
|
|
r.logger.Infof("Auto-detected mode as '%v'", rmode)
|
|
}()
|
|
|
|
if onlyFullyQualifiedCDIDevices(image) {
|
|
return "cdi"
|
|
}
|
|
|
|
isTegra, reason := r.info.IsTegraSystem()
|
|
r.logger.Debugf("Is Tegra-based system? %v: %v", isTegra, reason)
|
|
|
|
hasNVML, reason := r.info.HasNvml()
|
|
r.logger.Debugf("Has NVML? %v: %v", hasNVML, reason)
|
|
|
|
usesNVGPUModule, reason := r.info.UsesNVGPUModule()
|
|
r.logger.Debugf("Uses nvgpu kernel module? %v: %v", usesNVGPUModule, reason)
|
|
|
|
if (isTegra && !hasNVML) || usesNVGPUModule {
|
|
return "csv"
|
|
}
|
|
|
|
return "legacy"
|
|
}
|
|
|
|
func onlyFullyQualifiedCDIDevices(image image.CUDA) bool {
|
|
var hasCDIdevice bool
|
|
for _, device := range image.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
|
if !cdi.IsQualifiedName(device) {
|
|
return false
|
|
}
|
|
hasCDIdevice = true
|
|
}
|
|
return hasCDIdevice
|
|
}
|