Update go-nvlib to skip non-MIG devices

This change updates go-nvlib to ensure that non-migcapable GPUs are skipped when generating CDI specifications for MIG devices. Signed-off-by: Evan Lezar <elezar@nvidia.com>
2025-06-26 18:18:24 +00:00 · 2023-05-22 15:28:38 +02:00
parent 3ea02d13fc
commit e11f65e51e
12 changed files with 432 additions and 139 deletions
--- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go
+++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go
@@ -2001,9 +2001,16 @@ func DeviceGetGpuInstancePossiblePlacements(Device Device, Info *GpuInstanceProf
 	if Info == nil {
 		return nil, ERROR_INVALID_ARGUMENT
 	}
-	var Count uint32 = Info.InstanceCount
+	var Count uint32
+	ret := nvmlDeviceGetGpuInstancePossiblePlacements(Device, Info.Id, nil, &Count)
+	if ret != SUCCESS {
+		return nil, ret
+	}
+	if Count == 0 {
+		return []GpuInstancePlacement{}, ret
+	}
 	Placements := make([]GpuInstancePlacement, Count)
-	ret := nvmlDeviceGetGpuInstancePossiblePlacements(Device, Info.Id, &Placements[0], &Count)
+	ret = nvmlDeviceGetGpuInstancePossiblePlacements(Device, Info.Id, &Placements[0], &Count)
 	return Placements[:Count], ret
 }

@@ -2577,9 +2584,9 @@ func (Device Device) GetVgpuSchedulerCapabilities() (VgpuSchedulerCapabilities,
 }

 // nvml.GpuInstanceGetComputeInstancePossiblePlacements()
-func GpuInstanceGetComputeInstancePossiblePlacements(GpuInstance GpuInstance, ProfileId int) ([]ComputeInstancePlacement, Return) {
+func GpuInstanceGetComputeInstancePossiblePlacements(GpuInstance GpuInstance, Info *ComputeInstanceProfileInfo) ([]ComputeInstancePlacement, Return) {
 	var Count uint32
-	ret := nvmlGpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, uint32(ProfileId), nil, &Count)
+	ret := nvmlGpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, Info.Id, nil, &Count)
 	if ret != SUCCESS {
 		return nil, ret
 	}
@@ -2587,21 +2594,21 @@ func GpuInstanceGetComputeInstancePossiblePlacements(GpuInstance GpuInstance, Pr
 		return []ComputeInstancePlacement{}, ret
 	}
 	PlacementArray := make([]ComputeInstancePlacement, Count)
-	ret = nvmlGpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, uint32(ProfileId), &PlacementArray[0], &Count)
+	ret = nvmlGpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, Info.Id, &PlacementArray[0], &Count)
 	return PlacementArray, ret
 }

-func (GpuInstance GpuInstance) GetComputeInstancePossiblePlacements(ProfileId int) ([]ComputeInstancePlacement, Return) {
-	return GpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, ProfileId)
+func (GpuInstance GpuInstance) GetComputeInstancePossiblePlacements(Info *ComputeInstanceProfileInfo) ([]ComputeInstancePlacement, Return) {
+	return GpuInstanceGetComputeInstancePossiblePlacements(GpuInstance, Info)
 }

 // nvml.GpuInstanceCreateComputeInstanceWithPlacement()
-func GpuInstanceCreateComputeInstanceWithPlacement(GpuInstance GpuInstance, ProfileId int, Placement *ComputeInstancePlacement, ComputeInstance *ComputeInstance) Return {
-	return nvmlGpuInstanceCreateComputeInstanceWithPlacement(GpuInstance, uint32(ProfileId), Placement, ComputeInstance)
+func GpuInstanceCreateComputeInstanceWithPlacement(GpuInstance GpuInstance, Info *ComputeInstanceProfileInfo, Placement *ComputeInstancePlacement, ComputeInstance *ComputeInstance) Return {
+	return nvmlGpuInstanceCreateComputeInstanceWithPlacement(GpuInstance, Info.Id, Placement, ComputeInstance)
 }

-func (GpuInstance GpuInstance) CreateComputeInstanceWithPlacement(ProfileId int, Placement *ComputeInstancePlacement, ComputeInstance *ComputeInstance) Return {
-	return GpuInstanceCreateComputeInstanceWithPlacement(GpuInstance, ProfileId, Placement, ComputeInstance)
+func (GpuInstance GpuInstance) CreateComputeInstanceWithPlacement(Info *ComputeInstanceProfileInfo, Placement *ComputeInstancePlacement, ComputeInstance *ComputeInstance) Return {
+	return GpuInstanceCreateComputeInstanceWithPlacement(GpuInstance, Info, Placement, ComputeInstance)
 }

 // nvml.DeviceGetGpuFabricInfo()