From 8b5e3d224dc9b2519ce844cf64c6868a9d2c3eb6 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 14 Oct 2022 10:31:50 +0200 Subject: [PATCH] Ensure that invalid MIG profiles are skipped Signed-off-by: Evan Lezar --- pkg/nvlib/device/device.go | 6 ++++++ pkg/nvlib/device/mig_device.go | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/pkg/nvlib/device/device.go b/pkg/nvlib/device/device.go index c10cfcc..e33480e 100644 --- a/pkg/nvlib/device/device.go +++ b/pkg/nvlib/device/device.go @@ -130,6 +130,12 @@ func (d *device) VisitMigProfiles(visit func(MigProfile) error) error { for i := 0; i < nvml.GPU_INSTANCE_PROFILE_COUNT; i++ { giProfileInfo, ret := d.GetGpuInstanceProfileInfo(i) + if ret == nvml.ERROR_NOT_SUPPORTED { + continue + } + if ret == nvml.ERROR_INVALID_ARGUMENT { + continue + } if ret != nvml.SUCCESS { return fmt.Errorf("error getting GPU Instance profile info: %v", ret) } diff --git a/pkg/nvlib/device/mig_device.go b/pkg/nvlib/device/mig_device.go index 0d87c98..46af41e 100644 --- a/pkg/nvlib/device/mig_device.go +++ b/pkg/nvlib/device/mig_device.go @@ -101,6 +101,12 @@ func (m *migdevice) GetProfile() (MigProfile, error) { for i := 0; i < nvml.GPU_INSTANCE_PROFILE_COUNT; i++ { giProfileInfo, ret := parent.GetGpuInstanceProfileInfo(i) + if ret == nvml.ERROR_NOT_SUPPORTED { + continue + } + if ret == nvml.ERROR_INVALID_ARGUMENT { + continue + } if ret != nvml.SUCCESS { return nil, fmt.Errorf("error getting GPU Instance profile info: %v", ret) } @@ -112,6 +118,12 @@ func (m *migdevice) GetProfile() (MigProfile, error) { for j := 0; j < nvml.COMPUTE_INSTANCE_PROFILE_COUNT; j++ { for k := 0; k < nvml.COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT; k++ { ciProfileInfo, ret := gi.GetComputeInstanceProfileInfo(j, k) + if ret == nvml.ERROR_NOT_SUPPORTED { + continue + } + if ret == nvml.ERROR_INVALID_ARGUMENT { + continue + } if ret != nvml.SUCCESS { return nil, fmt.Errorf("error getting Compute Instance profile info: %v", ret)