mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Update vendoring for nvpci
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
23
vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device/api.go
generated
vendored
23
vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device/api.go
generated
vendored
@@ -26,7 +26,9 @@ type Interface interface {
|
||||
GetMigDevices() ([]MigDevice, error)
|
||||
GetMigProfiles() ([]MigProfile, error)
|
||||
NewDevice(d nvml.Device) (Device, error)
|
||||
NewDeviceByUUID(uuid string) (Device, error)
|
||||
NewMigDevice(d nvml.Device) (MigDevice, error)
|
||||
NewMigDeviceByUUID(uuid string) (MigDevice, error)
|
||||
NewMigProfile(giProfileID, ciProfileID, ciEngProfileID int, migMemorySizeMB, deviceMemorySizeBytes uint64) (MigProfile, error)
|
||||
ParseMigProfile(profile string) (MigProfile, error)
|
||||
VisitDevices(func(i int, d Device) error) error
|
||||
@@ -35,7 +37,8 @@ type Interface interface {
|
||||
}
|
||||
|
||||
type devicelib struct {
|
||||
nvml nvml.Interface
|
||||
nvml nvml.Interface
|
||||
skippedDevices map[string]struct{}
|
||||
}
|
||||
|
||||
var _ Interface = &devicelib{}
|
||||
@@ -49,6 +52,12 @@ func New(opts ...Option) Interface {
|
||||
if d.nvml == nil {
|
||||
d.nvml = nvml.New()
|
||||
}
|
||||
if d.skippedDevices == nil {
|
||||
WithSkippedDevices(
|
||||
"DGX Display",
|
||||
"NVIDIA DGX Display",
|
||||
)(d)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
@@ -59,5 +68,17 @@ func WithNvml(nvml nvml.Interface) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithSkippedDevices provides an Option to set devices to be skipped by model name
|
||||
func WithSkippedDevices(names ...string) Option {
|
||||
return func(d *devicelib) {
|
||||
if d.skippedDevices == nil {
|
||||
d.skippedDevices = make(map[string]struct{})
|
||||
}
|
||||
for _, name := range names {
|
||||
d.skippedDevices[name] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Option defines a function for passing options to the New() call
|
||||
type Option func(*devicelib)
|
||||
|
||||
45
vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device/device.go
generated
vendored
45
vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device/device.go
generated
vendored
@@ -43,6 +43,20 @@ var _ Device = &device{}
|
||||
|
||||
// NewDevice builds a new Device from an nvml.Device
|
||||
func (d *devicelib) NewDevice(dev nvml.Device) (Device, error) {
|
||||
return d.newDevice(dev)
|
||||
}
|
||||
|
||||
// NewDeviceByUUID builds a new Device from a UUID
|
||||
func (d *devicelib) NewDeviceByUUID(uuid string) (Device, error) {
|
||||
dev, ret := d.nvml.DeviceGetHandleByUUID(uuid)
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device handle for uuid '%v': %v", uuid, ret)
|
||||
}
|
||||
return d.newDevice(dev)
|
||||
}
|
||||
|
||||
// newDevice creates a device from an nvml.Device
|
||||
func (d *devicelib) newDevice(dev nvml.Device) (*device, error) {
|
||||
return &device{dev, d}, nil
|
||||
}
|
||||
|
||||
@@ -130,6 +144,12 @@ func (d *device) VisitMigProfiles(visit func(MigProfile) error) error {
|
||||
|
||||
for i := 0; i < nvml.GPU_INSTANCE_PROFILE_COUNT; i++ {
|
||||
giProfileInfo, ret := d.GetGpuInstanceProfileInfo(i)
|
||||
if ret == nvml.ERROR_NOT_SUPPORTED {
|
||||
continue
|
||||
}
|
||||
if ret == nvml.ERROR_INVALID_ARGUMENT {
|
||||
continue
|
||||
}
|
||||
if ret != nvml.SUCCESS {
|
||||
return fmt.Errorf("error getting GPU Instance profile info: %v", ret)
|
||||
}
|
||||
@@ -177,6 +197,20 @@ func (d *device) GetMigProfiles() ([]MigProfile, error) {
|
||||
return profiles, nil
|
||||
}
|
||||
|
||||
// isSkipped checks whether the device should be skipped.
|
||||
func (d *device) isSkipped() (bool, error) {
|
||||
name, ret := d.GetName()
|
||||
if ret != nvml.SUCCESS {
|
||||
return false, fmt.Errorf("error getting device name: %v", ret)
|
||||
}
|
||||
|
||||
if _, exists := d.lib.skippedDevices[name]; exists {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// VisitDevices visits each top-level device and invokes a callback function for it
|
||||
func (d *devicelib) VisitDevices(visit func(int, Device) error) error {
|
||||
count, ret := d.nvml.DeviceGetCount()
|
||||
@@ -189,10 +223,19 @@ func (d *devicelib) VisitDevices(visit func(int, Device) error) error {
|
||||
if ret != nvml.SUCCESS {
|
||||
return fmt.Errorf("error getting device handle for index '%v': %v", i, ret)
|
||||
}
|
||||
dev, err := d.NewDevice(device)
|
||||
dev, err := d.newDevice(device)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating new device wrapper: %v", err)
|
||||
}
|
||||
|
||||
isSkipped, err := dev.isSkipped()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking whether device is skipped: %v", err)
|
||||
}
|
||||
if isSkipped {
|
||||
continue
|
||||
}
|
||||
|
||||
err = visit(i, dev)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error visiting device: %v", err)
|
||||
|
||||
21
vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device/mig_device.go
generated
vendored
21
vendor/gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device/mig_device.go
generated
vendored
@@ -48,6 +48,15 @@ func (d *devicelib) NewMigDevice(handle nvml.Device) (MigDevice, error) {
|
||||
return &migdevice{handle, d, nil}, nil
|
||||
}
|
||||
|
||||
// NewMigDeviceByUUID builds a new MigDevice from a UUID
|
||||
func (d *devicelib) NewMigDeviceByUUID(uuid string) (MigDevice, error) {
|
||||
dev, ret := d.nvml.DeviceGetHandleByUUID(uuid)
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device handle for uuid '%v': %v", uuid, ret)
|
||||
}
|
||||
return d.NewMigDevice(dev)
|
||||
}
|
||||
|
||||
// GetProfile returns the MIG profile associated with a MIG device
|
||||
func (m *migdevice) GetProfile() (MigProfile, error) {
|
||||
if m.profile != nil {
|
||||
@@ -101,6 +110,12 @@ func (m *migdevice) GetProfile() (MigProfile, error) {
|
||||
|
||||
for i := 0; i < nvml.GPU_INSTANCE_PROFILE_COUNT; i++ {
|
||||
giProfileInfo, ret := parent.GetGpuInstanceProfileInfo(i)
|
||||
if ret == nvml.ERROR_NOT_SUPPORTED {
|
||||
continue
|
||||
}
|
||||
if ret == nvml.ERROR_INVALID_ARGUMENT {
|
||||
continue
|
||||
}
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU Instance profile info: %v", ret)
|
||||
}
|
||||
@@ -112,6 +127,12 @@ func (m *migdevice) GetProfile() (MigProfile, error) {
|
||||
for j := 0; j < nvml.COMPUTE_INSTANCE_PROFILE_COUNT; j++ {
|
||||
for k := 0; k < nvml.COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT; k++ {
|
||||
ciProfileInfo, ret := gi.GetComputeInstanceProfileInfo(j, k)
|
||||
if ret == nvml.ERROR_NOT_SUPPORTED {
|
||||
continue
|
||||
}
|
||||
if ret == nvml.ERROR_INVALID_ARGUMENT {
|
||||
continue
|
||||
}
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting Compute Instance profile info: %v", ret)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user