Merge pull request #44 from ArangoGutierrez/nvmlDeviceGetGpuFabricInfoV

Add method IsFabricAttached to Device interface
This commit is contained in:
Carlos Eduardo Arango Gutierrez 2024-09-28 18:28:40 +02:00 committed by GitHub
commit 41955a0842
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -32,6 +32,7 @@ type Device interface {
GetMigDevices() ([]MigDevice, error) GetMigDevices() ([]MigDevice, error)
GetMigProfiles() ([]MigProfile, error) GetMigProfiles() ([]MigProfile, error)
GetPCIBusID() (string, error) GetPCIBusID() (string, error)
IsFabricAttached() (bool, error)
IsMigCapable() (bool, error) IsMigCapable() (bool, error)
IsMigEnabled() (bool, error) IsMigEnabled() (bool, error)
VisitMigDevices(func(j int, m MigDevice) error) error VisitMigDevices(func(j int, m MigDevice) error) error
@ -208,6 +209,47 @@ func (d *device) IsMigEnabled() (bool, error) {
return (mode == nvml.DEVICE_MIG_ENABLE), nil return (mode == nvml.DEVICE_MIG_ENABLE), nil
} }
// IsFabricAttached checks if a device is attached to a GPU fabric.
func (d *device) IsFabricAttached() (bool, error) {
if d.lib.hasSymbol("nvmlDeviceGetGpuFabricInfo") {
info, ret := d.GetGpuFabricInfo()
if ret == nvml.ERROR_NOT_SUPPORTED {
return false, nil
}
if ret != nvml.SUCCESS {
return false, fmt.Errorf("error getting GPU Fabric Info: %v", ret)
}
if info.State != nvml.GPU_FABRIC_STATE_COMPLETED {
return false, nil
}
if nvml.Return(info.Status) != nvml.SUCCESS {
return false, nil
}
return true, nil
}
if d.lib.hasSymbol("nvmlDeviceGetGpuFabricInfoV") {
info, ret := d.GetGpuFabricInfoV().V2()
if ret == nvml.ERROR_NOT_SUPPORTED {
return false, nil
}
if ret != nvml.SUCCESS {
return false, fmt.Errorf("error getting GPU Fabric Info: %v", ret)
}
if info.State != nvml.GPU_FABRIC_STATE_COMPLETED {
return false, nil
}
if nvml.Return(info.Status) != nvml.SUCCESS {
return false, nil
}
return true, nil
}
return false, nil
}
// VisitMigDevices walks a top-level device and invokes a callback function for each MIG device configured on it. // VisitMigDevices walks a top-level device and invokes a callback function for each MIG device configured on it.
func (d *device) VisitMigDevices(visit func(int, MigDevice) error) error { func (d *device) VisitMigDevices(visit func(int, MigDevice) error) error {
capable, err := d.IsMigCapable() capable, err := d.IsMigCapable()