From 655eb9795c5438eb44626aba910b0fa79ba83bf4 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 16 Nov 2022 16:07:58 +0100 Subject: [PATCH 1/4] Skip display devices based on device names This allows devices to be skipped based on device names and skips "NVIDIA DGX Display" devices by default. Signed-off-by: Evan Lezar --- pkg/nvlib/device/api.go | 22 ++++++++++------------ pkg/nvlib/device/device.go | 20 +++++++++++++++++--- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/pkg/nvlib/device/api.go b/pkg/nvlib/device/api.go index 498bda8..7741915 100644 --- a/pkg/nvlib/device/api.go +++ b/pkg/nvlib/device/api.go @@ -35,8 +35,8 @@ type Interface interface { } type devicelib struct { - nvml nvml.Interface - selectedDeviceClasses map[Class]struct{} + nvml nvml.Interface + skippedDevices map[string]struct{} } var _ Interface = &devicelib{} @@ -50,10 +50,8 @@ func New(opts ...Option) Interface { if d.nvml == nil { d.nvml = nvml.New() } - if d.selectedDeviceClasses == nil { - d.selectedDeviceClasses = map[Class]struct{}{ - ClassCompute: {}, - } + if d.skippedDevices == nil { + WithSkippedDevices("NVIDIA DGX Display")(d) } return d } @@ -65,14 +63,14 @@ func WithNvml(nvml nvml.Interface) Option { } } -// WithSelectedDeviceClasses selects the specified device classes when filtering devices -func WithSelectedDeviceClasses(classes ...Class) Option { +// WithSkippedDevices provides an Option to set devices to be skipped by model name +func WithSkippedDevices(names ...string) Option { return func(d *devicelib) { - if d.selectedDeviceClasses == nil { - d.selectedDeviceClasses = make(map[Class]struct{}) + if d.skippedDevices == nil { + d.skippedDevices = make(map[string]struct{}) } - for _, c := range classes { - d.selectedDeviceClasses[c] = struct{}{} + for _, name := range names { + d.skippedDevices[name] = struct{}{} } } } diff --git a/pkg/nvlib/device/device.go b/pkg/nvlib/device/device.go index bb0eee1..640c01f 100644 --- a/pkg/nvlib/device/device.go +++ b/pkg/nvlib/device/device.go @@ -238,6 +238,20 @@ func (d *device) getClass() (Class, error) { return Class(device.Class), nil } +// isSkipped checks whether the device should be skipped. +func (d *device) isSkipped() (bool, error) { + name, ret := d.GetName() + if ret != nvml.SUCCESS { + return false, fmt.Errorf("error getting device name: %v", ret) + } + + if _, exists := d.lib.skippedDevices[name]; exists { + return true, nil + } + + return false, nil +} + // VisitDevices visits each top-level device and invokes a callback function for it func (d *devicelib) VisitDevices(visit func(int, Device) error) error { count, ret := d.nvml.DeviceGetCount() @@ -255,11 +269,11 @@ func (d *devicelib) VisitDevices(visit func(int, Device) error) error { return fmt.Errorf("error creating new device wrapper: %v", err) } - class, err := dev.getClass() + isSkipped, err := dev.isSkipped() if err != nil { - return fmt.Errorf("error getting PCI device class for device: %v", err) + return fmt.Errorf("error checking whether device is skipped: %v", err) } - if !d.classIsSelected(class) { + if isSkipped { continue } From 1fc1eee3921dd89cd4be4dc8ab768d035497a072 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 16 Nov 2022 16:10:00 +0100 Subject: [PATCH 2/4] Remove WithSelecteDeviceClasses option Signed-off-by: Evan Lezar --- pkg/nvlib/device/device.go | 50 -------------------------------------- 1 file changed, 50 deletions(-) diff --git a/pkg/nvlib/device/device.go b/pkg/nvlib/device/device.go index 640c01f..bcc1409 100644 --- a/pkg/nvlib/device/device.go +++ b/pkg/nvlib/device/device.go @@ -18,11 +18,9 @@ package device import ( "fmt" - "strings" "github.com/NVIDIA/go-nvml/pkg/dl" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" - "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci" ) // Device defines the set of extended functions associated with a device.Device @@ -41,15 +39,6 @@ type device struct { lib *devicelib } -// Class represents the PCI class for a device -type Class uint32 - -// Define constants for common device classes -const ( - ClassCompute = Class(nvpci.PCI3dControllerClass) - ClassDisplay = Class(nvpci.PCIVgaControllerClass) -) - var _ Device = &device{} // NewDevice builds a new Device from an nvml.Device @@ -62,16 +51,6 @@ func (d *devicelib) newDevice(dev nvml.Device) (*device, error) { return &device{dev, d}, nil } -// classIsSelected checks whether the specified class has been selected when constructing the devicelib -func (d *devicelib) classIsSelected(c Class) bool { - if d.selectedDeviceClasses == nil { - return false - } - _, exists := d.selectedDeviceClasses[c] - - return exists -} - // IsMigCapable checks if a device is capable of having MIG paprtitions created on it func (d *device) IsMigCapable() (bool, error) { err := nvmlLookupSymbol("nvmlDeviceGetMigMode") @@ -209,35 +188,6 @@ func (d *device) GetMigProfiles() ([]MigProfile, error) { return profiles, nil } -// getClass returns the PCI device class for the device -func (d *device) getClass() (Class, error) { - info, ret := d.GetPciInfo() - if ret != nvml.SUCCESS { - return 0, fmt.Errorf("failed to get PCI info: %v", ret) - } - - // We convert the BusId to a string - var bytes []byte - for _, b := range info.BusId { - if byte(b) == '\x00' { - break - } - bytes = append(bytes, byte(b)) - } - id := strings.ToLower(string(bytes)) - - if id != "0000" { - id = strings.TrimPrefix(id, "0000") - } - - device, err := nvpci.New().GetGPUByPciBusID(id) - if err != nil { - return 0, fmt.Errorf("failed to construct PCI device: %v", err) - } - - return Class(device.Class), nil -} - // isSkipped checks whether the device should be skipped. func (d *device) isSkipped() (bool, error) { name, ret := d.GetName() From d69a94ffddfdd99a1b2f7cc67a607e6fdd531281 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 16 Nov 2022 16:19:27 +0100 Subject: [PATCH 3/4] Add .shell target for interactive make Signed-off-by: Evan Lezar --- Makefile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Makefile b/Makefile index 78a5692..edb90f6 100644 --- a/Makefile +++ b/Makefile @@ -96,3 +96,15 @@ $(DOCKER_TARGETS): docker-%: .build-image --user $$(id -u):$$(id -g) \ $(BUILDIMAGE) \ make $(*) + +# Start an interactive shell using the development image. +PHONY: .shell +.shell: + $(DOCKER) run \ + --rm \ + -ti \ + -e GOCACHE=/tmp/.cache \ + -v $(PWD):$(PWD) \ + -w $(PWD) \ + --user $$(id -u):$$(id -g) \ + $(BUILDIMAGE) From 417a5254a4275b2c7074c0535bd28c48e08133dc Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 21 Nov 2022 10:55:10 +0100 Subject: [PATCH 4/4] Pin moq version to v0.2.7 Signed-off-by: Evan Lezar --- docker/Dockerfile.devel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.devel b/docker/Dockerfile.devel index bd93c72..4fa2027 100644 --- a/docker/Dockerfile.devel +++ b/docker/Dockerfile.devel @@ -15,4 +15,4 @@ ARG GOLANG_VERSION=1.16 FROM golang:${GOLANG_VERSION} RUN go get -u golang.org/x/lint/golint -RUN go install github.com/matryer/moq@latest +RUN go install github.com/matryer/moq@v0.2.7