mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-03-24 22:49:43 +00:00
Merge branch 'update-cdi-spec-generation' into 'main'
Update CDI spec generation See merge request nvidia/container-toolkit/container-toolkit!225
This commit is contained in:
commit
c068d4048f
@ -139,7 +139,7 @@ func (m command) generateSpec() (*specs.Spec, error) {
|
||||
devicelib := device.New(device.WithNvml(nvmllib))
|
||||
|
||||
spec := specs.Spec{
|
||||
Version: specs.CurrentVersion,
|
||||
Version: "0.4.0",
|
||||
Kind: "nvidia.com/gpu",
|
||||
ContainerEdits: specs.ContainerEdits{},
|
||||
}
|
||||
@ -176,6 +176,23 @@ func (m command) generateSpec() (*specs.Spec, error) {
|
||||
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
|
||||
}
|
||||
|
||||
// We create an "all" device with all the discovered device nodes
|
||||
var allDeviceNodes []*specs.DeviceNode
|
||||
for _, d := range spec.Devices {
|
||||
for _, dn := range d.ContainerEdits.DeviceNodes {
|
||||
allDeviceNodes = append(allDeviceNodes, dn)
|
||||
}
|
||||
}
|
||||
all := specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: allDeviceNodes,
|
||||
},
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, all)
|
||||
spec.ContainerEdits.DeviceNodes = m.getExistingMetaDeviceNodes()
|
||||
|
||||
libraries, err := m.findLibs(nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver libraries: %v", err)
|
||||
@ -201,20 +218,13 @@ func (m command) generateSpec() (*specs.Spec, error) {
|
||||
}
|
||||
|
||||
func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
|
||||
deviceNodePaths, err := d.GetDeviceNodes()
|
||||
if err != nil {
|
||||
return specs.Device{}, fmt.Errorf("failed to get paths for device: %v", err)
|
||||
}
|
||||
for _, p := range deviceNodePaths {
|
||||
deviceNode := specs.DeviceNode{
|
||||
Path: p,
|
||||
// TODO: Set the host path dependent on the root
|
||||
HostPath: p,
|
||||
}
|
||||
deviceNodes = append(deviceNodes, &deviceNode)
|
||||
}
|
||||
|
||||
deviceNodes := getDeviceNodesFromPaths(deviceNodePaths)
|
||||
|
||||
device := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
@ -225,6 +235,38 @@ func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (m command) getExistingMetaDeviceNodes() []*specs.DeviceNode {
|
||||
metaDeviceNodePaths := []string{
|
||||
"/dev/nvidia-modeset",
|
||||
"/dev/nvidia-uvm-tools",
|
||||
"/dev/nvidia-uvm",
|
||||
"/dev/nvidiactl",
|
||||
}
|
||||
|
||||
var existingDeviceNodePaths []string
|
||||
for _, p := range metaDeviceNodePaths {
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
m.logger.Infof("Ignoring missing meta device %v", p)
|
||||
continue
|
||||
}
|
||||
existingDeviceNodePaths = append(existingDeviceNodePaths, p)
|
||||
}
|
||||
|
||||
return getDeviceNodesFromPaths(existingDeviceNodePaths)
|
||||
}
|
||||
|
||||
func getDeviceNodesFromPaths(deviceNodePaths []string) []*specs.DeviceNode {
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
for _, p := range deviceNodePaths {
|
||||
deviceNode := specs.DeviceNode{
|
||||
Path: p,
|
||||
}
|
||||
deviceNodes = append(deviceNodes, &deviceNode)
|
||||
}
|
||||
|
||||
return deviceNodes
|
||||
}
|
||||
|
||||
func (m command) findLibs(nvmllib nvml.Interface) ([]string, error) {
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
|
@ -42,11 +42,11 @@ var _ deviceInfo = (*nvmlDevice)(nil)
|
||||
var _ deviceInfo = (*nvmlMigDevice)(nil)
|
||||
|
||||
func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) {
|
||||
return fmt.Sprintf("%v", i), nvmlDevice{gpu}
|
||||
return fmt.Sprintf("gpu%v", i), nvmlDevice{gpu}
|
||||
}
|
||||
|
||||
func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) {
|
||||
return fmt.Sprintf("%v:%v", i, j), nvmlMigDevice{mig}
|
||||
return fmt.Sprintf("mig%v:%v", i, j), nvmlMigDevice{mig}
|
||||
}
|
||||
|
||||
// GetUUID returns the UUID of the device
|
||||
|
Loading…
Reference in New Issue
Block a user