2023-02-16 15:29:53 +00:00
|
|
|
/**
|
|
|
|
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
**/
|
|
|
|
|
|
|
|
package nvcdi
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
|
2024-02-29 08:29:20 +00:00
|
|
|
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
|
|
|
"github.com/NVIDIA/go-nvlib/pkg/nvml"
|
2024-02-07 11:20:58 +00:00
|
|
|
"tags.cncf.io/container-device-interface/pkg/cdi"
|
|
|
|
"tags.cncf.io/container-device-interface/specs-go"
|
|
|
|
|
|
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
|
|
|
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
2023-02-16 15:29:53 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type nvmllib nvcdilib
|
|
|
|
|
|
|
|
var _ Interface = (*nvmllib)(nil)
|
|
|
|
|
2023-02-20 12:29:52 +00:00
|
|
|
// GetSpec should not be called for nvmllib
|
|
|
|
func (l *nvmllib) GetSpec() (spec.Interface, error) {
|
|
|
|
return nil, fmt.Errorf("Unexpected call to nvmllib.GetSpec()")
|
|
|
|
}
|
|
|
|
|
2023-02-16 15:29:53 +00:00
|
|
|
// GetAllDeviceSpecs returns the device specs for all available devices.
|
|
|
|
func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
|
|
|
var deviceSpecs []specs.Device
|
|
|
|
|
2023-03-15 08:12:44 +00:00
|
|
|
if r := l.nvmllib.Init(); r != nvml.SUCCESS {
|
|
|
|
return nil, fmt.Errorf("failed to initalize NVML: %v", r)
|
|
|
|
}
|
|
|
|
defer l.nvmllib.Shutdown()
|
|
|
|
|
2023-02-16 15:29:53 +00:00
|
|
|
gpuDeviceSpecs, err := l.getGPUDeviceSpecs()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
deviceSpecs = append(deviceSpecs, gpuDeviceSpecs...)
|
|
|
|
|
|
|
|
migDeviceSpecs, err := l.getMigDeviceSpecs()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
deviceSpecs = append(deviceSpecs, migDeviceSpecs...)
|
|
|
|
|
|
|
|
return deviceSpecs, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetCommonEdits generates a CDI specification that can be used for ANY devices
|
|
|
|
func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
|
|
|
common, err := newCommonNVMLDiscoverer(l.logger, l.driverRoot, l.nvidiaCTKPath, l.nvmllib)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return edits.FromDiscoverer(common)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *nvmllib) getGPUDeviceSpecs() ([]specs.Device, error) {
|
|
|
|
var deviceSpecs []specs.Device
|
|
|
|
err := l.devicelib.VisitDevices(func(i int, d device.Device) error {
|
|
|
|
deviceSpec, err := l.GetGPUDeviceSpecs(i, d)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
deviceSpecs = append(deviceSpecs, *deviceSpec)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to generate CDI edits for GPU devices: %v", err)
|
|
|
|
}
|
|
|
|
return deviceSpecs, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *nvmllib) getMigDeviceSpecs() ([]specs.Device, error) {
|
|
|
|
var deviceSpecs []specs.Device
|
|
|
|
err := l.devicelib.VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
|
|
|
|
deviceSpec, err := l.GetMIGDeviceSpecs(i, d, j, mig)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
deviceSpecs = append(deviceSpecs, *deviceSpec)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to generate CDI edits for GPU devices: %v", err)
|
|
|
|
}
|
|
|
|
return deviceSpecs, err
|
|
|
|
}
|