mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-23 00:38:26 +00:00
dfa041991f
This change generates a v0.4.0 CDI spec instead of a v0.5.0 spec. This allows older versions of podman, for example, to be used. This requires that the device names do not start on a numeric character and that the HostPath for a device is unspecified. Signed-off-by: Evan Lezar <elezar@nvidia.com>
124 lines
3.5 KiB
Go
124 lines
3.5 KiB
Go
/*
|
|
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package cdi
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
|
)
|
|
|
|
// nvmlDevice wraps an nvml.Device with more functions.
|
|
type nvmlDevice struct {
|
|
nvml.Device
|
|
}
|
|
|
|
// nvmlMigDevice allows for specific functions of nvmlDevice to be overridden.
|
|
type nvmlMigDevice nvmlDevice
|
|
|
|
// deviceInfo defines the information the required to construct a Device
|
|
type deviceInfo interface {
|
|
GetUUID() (string, error)
|
|
GetDeviceNodes() ([]string, error)
|
|
}
|
|
|
|
var _ deviceInfo = (*nvmlDevice)(nil)
|
|
var _ deviceInfo = (*nvmlMigDevice)(nil)
|
|
|
|
func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) {
|
|
return fmt.Sprintf("gpu%v", i), nvmlDevice{gpu}
|
|
}
|
|
|
|
func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) {
|
|
return fmt.Sprintf("mig%v:%v", i, j), nvmlMigDevice{mig}
|
|
}
|
|
|
|
// GetUUID returns the UUID of the device
|
|
func (d nvmlDevice) GetUUID() (string, error) {
|
|
uuid, ret := d.Device.GetUUID()
|
|
if ret != nvml.SUCCESS {
|
|
return "", ret
|
|
}
|
|
return uuid, nil
|
|
}
|
|
|
|
// GetUUID returns the UUID of the device
|
|
func (d nvmlMigDevice) GetUUID() (string, error) {
|
|
return nvmlDevice(d).GetUUID()
|
|
}
|
|
|
|
// GetDeviceNodes returns the device node paths for a GPU device
|
|
func (d nvmlDevice) GetDeviceNodes() ([]string, error) {
|
|
minor, ret := d.GetMinorNumber()
|
|
if ret != nvml.SUCCESS {
|
|
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
|
}
|
|
path := fmt.Sprintf("/dev/nvidia%d", minor)
|
|
|
|
return []string{path}, nil
|
|
}
|
|
|
|
// GetDeviceNodes returns the device node paths for a MIG device
|
|
func (d nvmlMigDevice) GetDeviceNodes() ([]string, error) {
|
|
parent, ret := d.GetDeviceHandleFromMigDeviceHandle()
|
|
if ret != nvml.SUCCESS {
|
|
return nil, fmt.Errorf("error getting parent device: %v", ret)
|
|
}
|
|
minor, ret := parent.GetMinorNumber()
|
|
if ret != nvml.SUCCESS {
|
|
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
|
}
|
|
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
|
|
|
|
migCaps, err := nvcaps.NewMigCaps()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
|
|
}
|
|
|
|
gi, ret := d.GetGpuInstanceId()
|
|
if ret != nvml.SUCCESS {
|
|
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
|
|
}
|
|
|
|
ci, ret := d.GetComputeInstanceId()
|
|
if ret != nvml.SUCCESS {
|
|
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
|
|
}
|
|
|
|
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
|
|
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
|
|
}
|
|
|
|
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
|
|
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
|
}
|
|
|
|
devicePaths := []string{
|
|
parentPath,
|
|
giCapDevicePath,
|
|
ciCapDevicePath,
|
|
}
|
|
|
|
return devicePaths, nil
|
|
}
|