Merge branch 'fix-walk-mig-profile' into 'main'

Ensure that invalid MIG profiles are skipped

See merge request nvidia/cloud-native/go-nvlib!21
This commit is contained in:
Evan Lezar 2022-10-14 10:08:48 +00:00
commit 7222fea1a7
2 changed files with 18 additions and 0 deletions

View File

@ -130,6 +130,12 @@ func (d *device) VisitMigProfiles(visit func(MigProfile) error) error {
for i := 0; i < nvml.GPU_INSTANCE_PROFILE_COUNT; i++ {
giProfileInfo, ret := d.GetGpuInstanceProfileInfo(i)
if ret == nvml.ERROR_NOT_SUPPORTED {
continue
}
if ret == nvml.ERROR_INVALID_ARGUMENT {
continue
}
if ret != nvml.SUCCESS {
return fmt.Errorf("error getting GPU Instance profile info: %v", ret)
}

View File

@ -101,6 +101,12 @@ func (m *migdevice) GetProfile() (MigProfile, error) {
for i := 0; i < nvml.GPU_INSTANCE_PROFILE_COUNT; i++ {
giProfileInfo, ret := parent.GetGpuInstanceProfileInfo(i)
if ret == nvml.ERROR_NOT_SUPPORTED {
continue
}
if ret == nvml.ERROR_INVALID_ARGUMENT {
continue
}
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU Instance profile info: %v", ret)
}
@ -112,6 +118,12 @@ func (m *migdevice) GetProfile() (MigProfile, error) {
for j := 0; j < nvml.COMPUTE_INSTANCE_PROFILE_COUNT; j++ {
for k := 0; k < nvml.COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT; k++ {
ciProfileInfo, ret := gi.GetComputeInstanceProfileInfo(j, k)
if ret == nvml.ERROR_NOT_SUPPORTED {
continue
}
if ret == nvml.ERROR_INVALID_ARGUMENT {
continue
}
if ret != nvml.SUCCESS {
return nil, fmt.Errorf("error getting Compute Instance profile info: %v", ret)