mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-03-28 19:33:13 +00:00
Merge branch 'update-cdi-spec-generation' into 'main'
Update CDI spec generation See merge request nvidia/container-toolkit/container-toolkit!225
This commit is contained in:
commit
c068d4048f
@ -139,7 +139,7 @@ func (m command) generateSpec() (*specs.Spec, error) {
|
|||||||
devicelib := device.New(device.WithNvml(nvmllib))
|
devicelib := device.New(device.WithNvml(nvmllib))
|
||||||
|
|
||||||
spec := specs.Spec{
|
spec := specs.Spec{
|
||||||
Version: specs.CurrentVersion,
|
Version: "0.4.0",
|
||||||
Kind: "nvidia.com/gpu",
|
Kind: "nvidia.com/gpu",
|
||||||
ContainerEdits: specs.ContainerEdits{},
|
ContainerEdits: specs.ContainerEdits{},
|
||||||
}
|
}
|
||||||
@ -176,6 +176,23 @@ func (m command) generateSpec() (*specs.Spec, error) {
|
|||||||
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
|
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We create an "all" device with all the discovered device nodes
|
||||||
|
var allDeviceNodes []*specs.DeviceNode
|
||||||
|
for _, d := range spec.Devices {
|
||||||
|
for _, dn := range d.ContainerEdits.DeviceNodes {
|
||||||
|
allDeviceNodes = append(allDeviceNodes, dn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
all := specs.Device{
|
||||||
|
Name: "all",
|
||||||
|
ContainerEdits: specs.ContainerEdits{
|
||||||
|
DeviceNodes: allDeviceNodes,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
spec.Devices = append(spec.Devices, all)
|
||||||
|
spec.ContainerEdits.DeviceNodes = m.getExistingMetaDeviceNodes()
|
||||||
|
|
||||||
libraries, err := m.findLibs(nvmllib)
|
libraries, err := m.findLibs(nvmllib)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to locate driver libraries: %v", err)
|
return nil, fmt.Errorf("failed to locate driver libraries: %v", err)
|
||||||
@ -201,20 +218,13 @@ func (m command) generateSpec() (*specs.Spec, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
|
func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
|
||||||
var deviceNodes []*specs.DeviceNode
|
|
||||||
|
|
||||||
deviceNodePaths, err := d.GetDeviceNodes()
|
deviceNodePaths, err := d.GetDeviceNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return specs.Device{}, fmt.Errorf("failed to get paths for device: %v", err)
|
return specs.Device{}, fmt.Errorf("failed to get paths for device: %v", err)
|
||||||
}
|
}
|
||||||
for _, p := range deviceNodePaths {
|
|
||||||
deviceNode := specs.DeviceNode{
|
deviceNodes := getDeviceNodesFromPaths(deviceNodePaths)
|
||||||
Path: p,
|
|
||||||
// TODO: Set the host path dependent on the root
|
|
||||||
HostPath: p,
|
|
||||||
}
|
|
||||||
deviceNodes = append(deviceNodes, &deviceNode)
|
|
||||||
}
|
|
||||||
device := specs.Device{
|
device := specs.Device{
|
||||||
Name: name,
|
Name: name,
|
||||||
ContainerEdits: specs.ContainerEdits{
|
ContainerEdits: specs.ContainerEdits{
|
||||||
@ -225,6 +235,38 @@ func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
|
|||||||
return device, nil
|
return device, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m command) getExistingMetaDeviceNodes() []*specs.DeviceNode {
|
||||||
|
metaDeviceNodePaths := []string{
|
||||||
|
"/dev/nvidia-modeset",
|
||||||
|
"/dev/nvidia-uvm-tools",
|
||||||
|
"/dev/nvidia-uvm",
|
||||||
|
"/dev/nvidiactl",
|
||||||
|
}
|
||||||
|
|
||||||
|
var existingDeviceNodePaths []string
|
||||||
|
for _, p := range metaDeviceNodePaths {
|
||||||
|
if _, err := os.Stat(p); err != nil {
|
||||||
|
m.logger.Infof("Ignoring missing meta device %v", p)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
existingDeviceNodePaths = append(existingDeviceNodePaths, p)
|
||||||
|
}
|
||||||
|
|
||||||
|
return getDeviceNodesFromPaths(existingDeviceNodePaths)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getDeviceNodesFromPaths(deviceNodePaths []string) []*specs.DeviceNode {
|
||||||
|
var deviceNodes []*specs.DeviceNode
|
||||||
|
for _, p := range deviceNodePaths {
|
||||||
|
deviceNode := specs.DeviceNode{
|
||||||
|
Path: p,
|
||||||
|
}
|
||||||
|
deviceNodes = append(deviceNodes, &deviceNode)
|
||||||
|
}
|
||||||
|
|
||||||
|
return deviceNodes
|
||||||
|
}
|
||||||
|
|
||||||
func (m command) findLibs(nvmllib nvml.Interface) ([]string, error) {
|
func (m command) findLibs(nvmllib nvml.Interface) ([]string, error) {
|
||||||
version, r := nvmllib.SystemGetDriverVersion()
|
version, r := nvmllib.SystemGetDriverVersion()
|
||||||
if r != nvml.SUCCESS {
|
if r != nvml.SUCCESS {
|
||||||
|
@ -42,11 +42,11 @@ var _ deviceInfo = (*nvmlDevice)(nil)
|
|||||||
var _ deviceInfo = (*nvmlMigDevice)(nil)
|
var _ deviceInfo = (*nvmlMigDevice)(nil)
|
||||||
|
|
||||||
func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) {
|
func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) {
|
||||||
return fmt.Sprintf("%v", i), nvmlDevice{gpu}
|
return fmt.Sprintf("gpu%v", i), nvmlDevice{gpu}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) {
|
func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) {
|
||||||
return fmt.Sprintf("%v:%v", i, j), nvmlMigDevice{mig}
|
return fmt.Sprintf("mig%v:%v", i, j), nvmlMigDevice{mig}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetUUID returns the UUID of the device
|
// GetUUID returns the UUID of the device
|
||||||
|
Loading…
Reference in New Issue
Block a user