Merge branch 'fix-bug-in-visit-mig-profiles' into 'main'

Fix bug in heuristic for which MIG profiles to skip

See merge request nvidia/cloud-native/go-nvlib!35
This commit is contained in:
Kevin Klues 2023-03-25 22:11:36 +00:00
commit 649f196fb7
2 changed files with 14 additions and 1 deletions

View File

@ -40,6 +40,7 @@ type devicelib struct {
nvml nvml.Interface
skippedDevices map[string]struct{}
verifySymbols *bool
migProfiles []MigProfile
}
var _ Interface = &devicelib{}

View File

@ -212,7 +212,10 @@ func (d *device) VisitMigProfiles(visit func(MigProfile) error) error {
// physically constructed. In the future we should do this via
// NVML once a proper API for this exists.
pi := p.GetInfo()
if (pi.C * 2) > (pi.G + 1) {
if pi.C > pi.G {
continue
}
if (pi.C < pi.G) && ((pi.C * 2) > (pi.G + 1)) {
continue
}
@ -385,6 +388,12 @@ func (d *devicelib) GetMigDevices() ([]MigDevice, error) {
// GetMigProfiles gets the set of unique MIG profiles across all top-level devices
func (d *devicelib) GetMigProfiles() ([]MigProfile, error) {
// Return the cached list if available
if d.migProfiles != nil {
return d.migProfiles, nil
}
// Otherwise generate it...
var profiles []MigProfile
err := d.VisitMigProfiles(func(p MigProfile) error {
profiles = append(profiles, p)
@ -393,6 +402,9 @@ func (d *devicelib) GetMigProfiles() ([]MigProfile, error) {
if err != nil {
return nil, err
}
// And cache it before returning
d.migProfiles = profiles
return profiles, nil
}