Merge branch 'update-cdi-spec-generation' into 'main'

Update CDI spec generation

See merge request nvidia/container-toolkit/container-toolkit!225
This commit is contained in:
Evan Lezar 2022-10-10 10:07:19 +00:00
commit c068d4048f
2 changed files with 55 additions and 13 deletions

View File

@ -139,7 +139,7 @@ func (m command) generateSpec() (*specs.Spec, error) {
devicelib := device.New(device.WithNvml(nvmllib))
spec := specs.Spec{
Version: specs.CurrentVersion,
Version: "0.4.0",
Kind: "nvidia.com/gpu",
ContainerEdits: specs.ContainerEdits{},
}
@ -176,6 +176,23 @@ func (m command) generateSpec() (*specs.Spec, error) {
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
}
// We create an "all" device with all the discovered device nodes
var allDeviceNodes []*specs.DeviceNode
for _, d := range spec.Devices {
for _, dn := range d.ContainerEdits.DeviceNodes {
allDeviceNodes = append(allDeviceNodes, dn)
}
}
all := specs.Device{
Name: "all",
ContainerEdits: specs.ContainerEdits{
DeviceNodes: allDeviceNodes,
},
}
spec.Devices = append(spec.Devices, all)
spec.ContainerEdits.DeviceNodes = m.getExistingMetaDeviceNodes()
libraries, err := m.findLibs(nvmllib)
if err != nil {
return nil, fmt.Errorf("failed to locate driver libraries: %v", err)
@ -201,20 +218,13 @@ func (m command) generateSpec() (*specs.Spec, error) {
}
func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
var deviceNodes []*specs.DeviceNode
deviceNodePaths, err := d.GetDeviceNodes()
if err != nil {
return specs.Device{}, fmt.Errorf("failed to get paths for device: %v", err)
}
for _, p := range deviceNodePaths {
deviceNode := specs.DeviceNode{
Path: p,
// TODO: Set the host path dependent on the root
HostPath: p,
}
deviceNodes = append(deviceNodes, &deviceNode)
}
deviceNodes := getDeviceNodesFromPaths(deviceNodePaths)
device := specs.Device{
Name: name,
ContainerEdits: specs.ContainerEdits{
@ -225,6 +235,38 @@ func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
return device, nil
}
func (m command) getExistingMetaDeviceNodes() []*specs.DeviceNode {
metaDeviceNodePaths := []string{
"/dev/nvidia-modeset",
"/dev/nvidia-uvm-tools",
"/dev/nvidia-uvm",
"/dev/nvidiactl",
}
var existingDeviceNodePaths []string
for _, p := range metaDeviceNodePaths {
if _, err := os.Stat(p); err != nil {
m.logger.Infof("Ignoring missing meta device %v", p)
continue
}
existingDeviceNodePaths = append(existingDeviceNodePaths, p)
}
return getDeviceNodesFromPaths(existingDeviceNodePaths)
}
func getDeviceNodesFromPaths(deviceNodePaths []string) []*specs.DeviceNode {
var deviceNodes []*specs.DeviceNode
for _, p := range deviceNodePaths {
deviceNode := specs.DeviceNode{
Path: p,
}
deviceNodes = append(deviceNodes, &deviceNode)
}
return deviceNodes
}
func (m command) findLibs(nvmllib nvml.Interface) ([]string, error) {
version, r := nvmllib.SystemGetDriverVersion()
if r != nvml.SUCCESS {

View File

@ -42,11 +42,11 @@ var _ deviceInfo = (*nvmlDevice)(nil)
var _ deviceInfo = (*nvmlMigDevice)(nil)
func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) {
return fmt.Sprintf("%v", i), nvmlDevice{gpu}
return fmt.Sprintf("gpu%v", i), nvmlDevice{gpu}
}
func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) {
return fmt.Sprintf("%v:%v", i, j), nvmlMigDevice{mig}
return fmt.Sprintf("mig%v:%v", i, j), nvmlMigDevice{mig}
}
// GetUUID returns the UUID of the device