diff --git a/Makefile b/Makefile index 78a5692..edb90f6 100644 --- a/Makefile +++ b/Makefile @@ -96,3 +96,15 @@ $(DOCKER_TARGETS): docker-%: .build-image --user $$(id -u):$$(id -g) \ $(BUILDIMAGE) \ make $(*) + +# Start an interactive shell using the development image. +PHONY: .shell +.shell: + $(DOCKER) run \ + --rm \ + -ti \ + -e GOCACHE=/tmp/.cache \ + -v $(PWD):$(PWD) \ + -w $(PWD) \ + --user $$(id -u):$$(id -g) \ + $(BUILDIMAGE) diff --git a/docker/Dockerfile.devel b/docker/Dockerfile.devel index bd93c72..4fa2027 100644 --- a/docker/Dockerfile.devel +++ b/docker/Dockerfile.devel @@ -15,4 +15,4 @@ ARG GOLANG_VERSION=1.16 FROM golang:${GOLANG_VERSION} RUN go get -u golang.org/x/lint/golint -RUN go install github.com/matryer/moq@latest +RUN go install github.com/matryer/moq@v0.2.7 diff --git a/pkg/nvlib/device/api.go b/pkg/nvlib/device/api.go index 498bda8..7741915 100644 --- a/pkg/nvlib/device/api.go +++ b/pkg/nvlib/device/api.go @@ -35,8 +35,8 @@ type Interface interface { } type devicelib struct { - nvml nvml.Interface - selectedDeviceClasses map[Class]struct{} + nvml nvml.Interface + skippedDevices map[string]struct{} } var _ Interface = &devicelib{} @@ -50,10 +50,8 @@ func New(opts ...Option) Interface { if d.nvml == nil { d.nvml = nvml.New() } - if d.selectedDeviceClasses == nil { - d.selectedDeviceClasses = map[Class]struct{}{ - ClassCompute: {}, - } + if d.skippedDevices == nil { + WithSkippedDevices("NVIDIA DGX Display")(d) } return d } @@ -65,14 +63,14 @@ func WithNvml(nvml nvml.Interface) Option { } } -// WithSelectedDeviceClasses selects the specified device classes when filtering devices -func WithSelectedDeviceClasses(classes ...Class) Option { +// WithSkippedDevices provides an Option to set devices to be skipped by model name +func WithSkippedDevices(names ...string) Option { return func(d *devicelib) { - if d.selectedDeviceClasses == nil { - d.selectedDeviceClasses = make(map[Class]struct{}) + if d.skippedDevices == nil { + d.skippedDevices = make(map[string]struct{}) } - for _, c := range classes { - d.selectedDeviceClasses[c] = struct{}{} + for _, name := range names { + d.skippedDevices[name] = struct{}{} } } } diff --git a/pkg/nvlib/device/device.go b/pkg/nvlib/device/device.go index bb0eee1..bcc1409 100644 --- a/pkg/nvlib/device/device.go +++ b/pkg/nvlib/device/device.go @@ -18,11 +18,9 @@ package device import ( "fmt" - "strings" "github.com/NVIDIA/go-nvml/pkg/dl" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" - "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci" ) // Device defines the set of extended functions associated with a device.Device @@ -41,15 +39,6 @@ type device struct { lib *devicelib } -// Class represents the PCI class for a device -type Class uint32 - -// Define constants for common device classes -const ( - ClassCompute = Class(nvpci.PCI3dControllerClass) - ClassDisplay = Class(nvpci.PCIVgaControllerClass) -) - var _ Device = &device{} // NewDevice builds a new Device from an nvml.Device @@ -62,16 +51,6 @@ func (d *devicelib) newDevice(dev nvml.Device) (*device, error) { return &device{dev, d}, nil } -// classIsSelected checks whether the specified class has been selected when constructing the devicelib -func (d *devicelib) classIsSelected(c Class) bool { - if d.selectedDeviceClasses == nil { - return false - } - _, exists := d.selectedDeviceClasses[c] - - return exists -} - // IsMigCapable checks if a device is capable of having MIG paprtitions created on it func (d *device) IsMigCapable() (bool, error) { err := nvmlLookupSymbol("nvmlDeviceGetMigMode") @@ -209,33 +188,18 @@ func (d *device) GetMigProfiles() ([]MigProfile, error) { return profiles, nil } -// getClass returns the PCI device class for the device -func (d *device) getClass() (Class, error) { - info, ret := d.GetPciInfo() +// isSkipped checks whether the device should be skipped. +func (d *device) isSkipped() (bool, error) { + name, ret := d.GetName() if ret != nvml.SUCCESS { - return 0, fmt.Errorf("failed to get PCI info: %v", ret) + return false, fmt.Errorf("error getting device name: %v", ret) } - // We convert the BusId to a string - var bytes []byte - for _, b := range info.BusId { - if byte(b) == '\x00' { - break - } - bytes = append(bytes, byte(b)) - } - id := strings.ToLower(string(bytes)) - - if id != "0000" { - id = strings.TrimPrefix(id, "0000") + if _, exists := d.lib.skippedDevices[name]; exists { + return true, nil } - device, err := nvpci.New().GetGPUByPciBusID(id) - if err != nil { - return 0, fmt.Errorf("failed to construct PCI device: %v", err) - } - - return Class(device.Class), nil + return false, nil } // VisitDevices visits each top-level device and invokes a callback function for it @@ -255,11 +219,11 @@ func (d *devicelib) VisitDevices(visit func(int, Device) error) error { return fmt.Errorf("error creating new device wrapper: %v", err) } - class, err := dev.getClass() + isSkipped, err := dev.isSkipped() if err != nil { - return fmt.Errorf("error getting PCI device class for device: %v", err) + return fmt.Errorf("error checking whether device is skipped: %v", err) } - if !d.classIsSelected(class) { + if isSkipped { continue }