From dfa041991fdd49bed00abd1e36dd60e3fa4f2f39 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 7 Oct 2022 16:10:45 +0200 Subject: [PATCH 1/3] Generate v0.4.0 CDI spec This change generates a v0.4.0 CDI spec instead of a v0.5.0 spec. This allows older versions of podman, for example, to be used. This requires that the device names do not start on a numeric character and that the HostPath for a device is unspecified. Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go | 4 +--- cmd/nvidia-ctk/info/generate-cdi/nvml_devices.go | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go b/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go index d5438d5e..e8fa42aa 100644 --- a/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go +++ b/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go @@ -139,7 +139,7 @@ func (m command) generateSpec() (*specs.Spec, error) { devicelib := device.New(device.WithNvml(nvmllib)) spec := specs.Spec{ - Version: specs.CurrentVersion, + Version: "0.4.0", Kind: "nvidia.com/gpu", ContainerEdits: specs.ContainerEdits{}, } @@ -210,8 +210,6 @@ func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) { for _, p := range deviceNodePaths { deviceNode := specs.DeviceNode{ Path: p, - // TODO: Set the host path dependent on the root - HostPath: p, } deviceNodes = append(deviceNodes, &deviceNode) } diff --git a/cmd/nvidia-ctk/info/generate-cdi/nvml_devices.go b/cmd/nvidia-ctk/info/generate-cdi/nvml_devices.go index 95265bf0..221c4005 100644 --- a/cmd/nvidia-ctk/info/generate-cdi/nvml_devices.go +++ b/cmd/nvidia-ctk/info/generate-cdi/nvml_devices.go @@ -42,11 +42,11 @@ var _ deviceInfo = (*nvmlDevice)(nil) var _ deviceInfo = (*nvmlMigDevice)(nil) func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) { - return fmt.Sprintf("%v", i), nvmlDevice{gpu} + return fmt.Sprintf("gpu%v", i), nvmlDevice{gpu} } func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) { - return fmt.Sprintf("%v:%v", i, j), nvmlMigDevice{mig} + return fmt.Sprintf("mig%v:%v", i, j), nvmlMigDevice{mig} } // GetUUID returns the UUID of the device From 3dd80206957306a340a40ccdc03cc5b349ae7c76 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 7 Oct 2022 16:23:18 +0200 Subject: [PATCH 2/3] Include meta devices in generated CDI spec This change includes meta devices (e.g. /dev/nvidiactl) in the generated CDI spec. Missing device nodes are ignored. Signed-off-by: Evan Lezar --- .../info/generate-cdi/generate-cdi.go | 45 +++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go b/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go index e8fa42aa..23508423 100644 --- a/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go +++ b/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go @@ -176,6 +176,8 @@ func (m command) generateSpec() (*specs.Spec, error) { return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err) } + spec.ContainerEdits.DeviceNodes = m.getExistingMetaDeviceNodes() + libraries, err := m.findLibs(nvmllib) if err != nil { return nil, fmt.Errorf("failed to locate driver libraries: %v", err) @@ -201,18 +203,13 @@ func (m command) generateSpec() (*specs.Spec, error) { } func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) { - var deviceNodes []*specs.DeviceNode - deviceNodePaths, err := d.GetDeviceNodes() if err != nil { return specs.Device{}, fmt.Errorf("failed to get paths for device: %v", err) } - for _, p := range deviceNodePaths { - deviceNode := specs.DeviceNode{ - Path: p, - } - deviceNodes = append(deviceNodes, &deviceNode) - } + + deviceNodes := getDeviceNodesFromPaths(deviceNodePaths) + device := specs.Device{ Name: name, ContainerEdits: specs.ContainerEdits{ @@ -223,6 +220,38 @@ func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) { return device, nil } +func (m command) getExistingMetaDeviceNodes() []*specs.DeviceNode { + metaDeviceNodePaths := []string{ + "/dev/nvidia-modeset", + "/dev/nvidia-uvm-tools", + "/dev/nvidia-uvm", + "/dev/nvidiactl", + } + + var existingDeviceNodePaths []string + for _, p := range metaDeviceNodePaths { + if _, err := os.Stat(p); err != nil { + m.logger.Infof("Ignoring missing meta device %v", p) + continue + } + existingDeviceNodePaths = append(existingDeviceNodePaths, p) + } + + return getDeviceNodesFromPaths(existingDeviceNodePaths) +} + +func getDeviceNodesFromPaths(deviceNodePaths []string) []*specs.DeviceNode { + var deviceNodes []*specs.DeviceNode + for _, p := range deviceNodePaths { + deviceNode := specs.DeviceNode{ + Path: p, + } + deviceNodes = append(deviceNodes, &deviceNode) + } + + return deviceNodes +} + func (m command) findLibs(nvmllib nvml.Interface) ([]string, error) { version, r := nvmllib.SystemGetDriverVersion() if r != nvml.SUCCESS { From 1597ede2afe96a9f6e5e5770480301ba78435e2e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 10 Oct 2022 10:19:08 +0200 Subject: [PATCH 3/3] Add all device Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go b/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go index 23508423..7ef290b4 100644 --- a/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go +++ b/cmd/nvidia-ctk/info/generate-cdi/generate-cdi.go @@ -176,6 +176,21 @@ func (m command) generateSpec() (*specs.Spec, error) { return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err) } + // We create an "all" device with all the discovered device nodes + var allDeviceNodes []*specs.DeviceNode + for _, d := range spec.Devices { + for _, dn := range d.ContainerEdits.DeviceNodes { + allDeviceNodes = append(allDeviceNodes, dn) + } + } + all := specs.Device{ + Name: "all", + ContainerEdits: specs.ContainerEdits{ + DeviceNodes: allDeviceNodes, + }, + } + + spec.Devices = append(spec.Devices, all) spec.ContainerEdits.DeviceNodes = m.getExistingMetaDeviceNodes() libraries, err := m.findLibs(nvmllib)