From 2c175dcdbfe3a628ca6aa992f1198b2db200caa9 Mon Sep 17 00:00:00 2001 From: Zvonko Kaiser Date: Mon, 7 Feb 2022 12:15:46 +0100 Subject: [PATCH 1/3] Fix the linting errors --- pkg/nvpci/bytes/bytes.go | 15 +++++++++++---- pkg/nvpci/config.go | 5 +++++ pkg/nvpci/mmio/mmio.go | 21 ++++++++++++--------- pkg/nvpci/mmio/mock.go | 2 ++ pkg/nvpci/mock.go | 7 +++++++ pkg/nvpci/nvpci.go | 17 +++++++++++------ pkg/nvpci/resources.go | 11 +++++++---- 7 files changed, 55 insertions(+), 23 deletions(-) diff --git a/pkg/nvpci/bytes/bytes.go b/pkg/nvpci/bytes/bytes.go index daa94a7..7788a1f 100644 --- a/pkg/nvpci/bytes/bytes.go +++ b/pkg/nvpci/bytes/bytes.go @@ -21,10 +21,12 @@ import ( "unsafe" ) +// Raw returns just the bytes without any assumptions about layout type Raw interface { Raw() *[]byte } +// Reader used to read various data sizes in the byte array type Reader interface { Read8(pos int) uint8 Read16(pos int) uint16 @@ -33,6 +35,7 @@ type Reader interface { Len() int } +// Writer used to write various sizes of data in the byte array type Writer interface { Write8(pos int, value uint8) Write16(pos int, value uint16) @@ -41,6 +44,7 @@ type Writer interface { Len() int } +// Bytes object for manipulating arbitrary byte arrays type Bytes interface { Raw Reader @@ -66,22 +70,25 @@ func init() { } } +// New raw bytearray func New(data *[]byte) Bytes { return (*native)(data) } +// NewLittleEndian little endian ordering of bytes func NewLittleEndian(data *[]byte) Bytes { if nativeByteOrder == binary.LittleEndian { return (*native)(data) - } else { - return (*swapbo)(data) } + + return (*swapbo)(data) } +// NewBigEndian big endian ordering of bytes func NewBigEndian(data *[]byte) Bytes { if nativeByteOrder == binary.BigEndian { return (*native)(data) - } else { - return (*swapbo)(data) } + + return (*swapbo)(data) } diff --git a/pkg/nvpci/config.go b/pkg/nvpci/config.go index 214a190..5c4fc7f 100644 --- a/pkg/nvpci/config.go +++ b/pkg/nvpci/config.go @@ -29,10 +29,12 @@ const ( pciCapabilityListPointer = 0x34 ) +// ConfigSpace PCI configuration space (standard extended) file path type ConfigSpace struct { Path string } +// ConfigSpaceIO Interface for reading and writing raw and preconfigured values type ConfigSpaceIO interface { bytes.Bytes GetVendorID() uint16 @@ -44,15 +46,18 @@ type configSpaceIO struct { bytes.Bytes } +// PCIStandardCapability standard PCI config space type PCIStandardCapability struct { bytes.Bytes } +// PCIExtendedCapability extended PCI config space type PCIExtendedCapability struct { bytes.Bytes Version uint8 } +// PCICapabilities combines the standard and extended config space type PCICapabilities struct { Standard map[uint8]*PCIStandardCapability Extended map[uint16]*PCIExtendedCapability diff --git a/pkg/nvpci/mmio/mmio.go b/pkg/nvpci/mmio/mmio.go index ab101f5..602486e 100644 --- a/pkg/nvpci/mmio/mmio.go +++ b/pkg/nvpci/mmio/mmio.go @@ -25,6 +25,7 @@ import ( "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci/bytes" ) +// Mmio memory map a region type Mmio interface { bytes.Raw bytes.Reader @@ -41,25 +42,25 @@ type mmio struct { } func open(path string, offset int, size int, flags int) (Mmio, error) { - var mmap_flags int + var mmapFlags int switch flags { case os.O_RDONLY: - mmap_flags = syscall.PROT_READ + mmapFlags = syscall.PROT_READ case os.O_RDWR: - mmap_flags = syscall.PROT_READ | syscall.PROT_WRITE + mmapFlags = syscall.PROT_READ | syscall.PROT_WRITE default: - return nil, fmt.Errorf("invalid flags: %v\n", flags) + return nil, fmt.Errorf("invalid flags: %v", flags) } file, err := os.OpenFile(path, flags, 0) if err != nil { - return nil, fmt.Errorf("failed to open file: %v\n", err) + return nil, fmt.Errorf("failed to open file: %v", err) } defer file.Close() fi, err := file.Stat() if err != nil { - return nil, fmt.Errorf("failed to get file info: %v\n", err) + return nil, fmt.Errorf("failed to get file info: %v", err) } if size > int(fi.Size()) { @@ -74,19 +75,21 @@ func open(path string, offset int, size int, flags int) (Mmio, error) { int(file.Fd()), int64(offset), size, - mmap_flags, + mmapFlags, syscall.MAP_SHARED) if err != nil { - return nil, fmt.Errorf("failed to mmap file: %v\n", err) + return nil, fmt.Errorf("failed to mmap file: %v", err) } return &mmio{bytes.New(&mmap)}, nil } +// OpenRO open region readonly func OpenRO(path string, offset int, size int) (Mmio, error) { return open(path, offset, size, os.O_RDONLY) } +// OpenRW open region read write func OpenRW(path string, offset int, size int) (Mmio, error) { return open(path, offset, size, os.O_RDWR) } @@ -106,7 +109,7 @@ func (m *mmio) BigEndian() Mmio { func (m *mmio) Close() error { err := syscall.Munmap(*m.Bytes.Raw()) if err != nil { - return fmt.Errorf("failed to munmap file: %v\n", err) + return fmt.Errorf("failed to munmap file: %v", err) } return nil } diff --git a/pkg/nvpci/mmio/mock.go b/pkg/nvpci/mmio/mock.go index ff89581..42a86b1 100644 --- a/pkg/nvpci/mmio/mock.go +++ b/pkg/nvpci/mmio/mock.go @@ -48,10 +48,12 @@ func mockOpen(source *[]byte, offset int, size int, rw bool) (Mmio, error) { return m, nil } +// MockOpenRO open read only func MockOpenRO(source *[]byte, offset int, size int) (Mmio, error) { return mockOpen(source, offset, size, false) } +// MockOpenRW open read write func MockOpenRW(source *[]byte, offset int, size int) (Mmio, error) { return mockOpen(source, offset, size, true) } diff --git a/pkg/nvpci/mock.go b/pkg/nvpci/mock.go index fdeb461..60d57ae 100644 --- a/pkg/nvpci/mock.go +++ b/pkg/nvpci/mock.go @@ -25,12 +25,14 @@ import ( "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci/bytes" ) +// MockNvpci mock pci device type MockNvpci struct { *nvpci } var _ Interface = (*MockNvpci)(nil) +// NewMockNvpci create new mock PCI and remove old devices func NewMockNvpci() (mock *MockNvpci, rerr error) { rootDir, err := ioutil.TempDir("", "") if err != nil { @@ -49,10 +51,12 @@ func NewMockNvpci() (mock *MockNvpci, rerr error) { return mock, nil } +// Cleanup remove the mocked PCI devices root folder func (m *MockNvpci) Cleanup() { os.RemoveAll(m.pciDevicesRoot) } +// AddMockA100 Create an A100 like GPU mock device func (m *MockNvpci) AddMockA100(address string, numaNode int) error { deviceDir := filepath.Join(m.pciDevicesRoot, address) err := os.MkdirAll(deviceDir, 0755) @@ -111,6 +115,9 @@ func (m *MockNvpci) AddMockA100(address string, numaNode int) error { bar0 := []uint64{0x00000000c2000000, 0x00000000c2ffffff, 0x0000000000040200} resource, err := os.Create(filepath.Join(deviceDir, "resource")) + if err != nil { + return err + } _, err = resource.WriteString(fmt.Sprintf("0x%x 0x%x 0x%x", bar0[0], bar0[1], bar0[2])) if err != nil { return err diff --git a/pkg/nvpci/nvpci.go b/pkg/nvpci/nvpci.go index f4069f9..1638369 100644 --- a/pkg/nvpci/nvpci.go +++ b/pkg/nvpci/nvpci.go @@ -66,30 +66,34 @@ type NvidiaPCIDevice struct { Resources map[int]*MemoryResource } +// IsVGAController if class == 0x300 func (d *NvidiaPCIDevice) IsVGAController() bool { return d.Class == pciVgaControllerClass } +// Is3DController if class == 0x302 func (d *NvidiaPCIDevice) Is3DController() bool { return d.Class == pci3dControllerClass } +// IsNVSwitch if classe == 0x068 func (d *NvidiaPCIDevice) IsNVSwitch() bool { return d.Class == pciNvSwitchClass } +// IsGPU either VGA for older cards or 3D for newer func (d *NvidiaPCIDevice) IsGPU() bool { return d.IsVGAController() || d.Is3DController() } +// IsResetAvailable some devices can be reset without rebooting, +// check if applicable func (d *NvidiaPCIDevice) IsResetAvailable() bool { _, err := os.Stat(path.Join(d.Path, "reset")) - if err != nil { - return false - } - return true + return err == nil } +// Reset perform a reset to apply a new configuration at HW level func (d *NvidiaPCIDevice) Reset() error { err := ioutil.WriteFile(path.Join(d.Path, "reset"), []byte("1"), 0) if err != nil { @@ -98,6 +102,7 @@ func (d *NvidiaPCIDevice) Reset() error { return nil } +// New interface that allows us to get a list of all NVIDIA PCI devices func New() Interface { return &nvpci{pciDevicesRoot} } @@ -202,7 +207,7 @@ func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) { nvdevices = append(nvdevices, nvdevice) } - addressToId := func(address string) uint64 { + addressToID := func(address string) uint64 { address = strings.ReplaceAll(address, ":", "") address = strings.ReplaceAll(address, ".", "") id, _ := strconv.ParseUint(address, 16, 64) @@ -210,7 +215,7 @@ func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) { } sort.Slice(nvdevices, func(i, j int) bool { - return addressToId(nvdevices[i].Address) < addressToId(nvdevices[j].Address) + return addressToID(nvdevices[i].Address) < addressToID(nvdevices[j].Address) }) return nvdevices, nil diff --git a/pkg/nvpci/resources.go b/pkg/nvpci/resources.go index 618f4c0..5c985ac 100644 --- a/pkg/nvpci/resources.go +++ b/pkg/nvpci/resources.go @@ -28,6 +28,7 @@ const ( pmcBigEndian = 0x01000001 ) +// MemoryResource represents a mmio region type MemoryResource struct { Start uintptr End uintptr @@ -35,10 +36,11 @@ type MemoryResource struct { Path string } +// Open read write mmio region func (mr *MemoryResource) Open() (mmio.Mmio, error) { rw, err := mmio.OpenRW(mr.Path, 0, int(mr.End-mr.Start+1)) if err != nil { - return nil, fmt.Errorf("failed to open file for mmio: %v\n", err) + return nil, fmt.Errorf("failed to open file for mmio: %v", err) } switch rw.Read32(pmcEndianRegister) { case pmcBigEndian: @@ -46,13 +48,14 @@ func (mr *MemoryResource) Open() (mmio.Mmio, error) { case pmcLittleEndian: return rw.LittleEndian(), nil } - return nil, fmt.Errorf("unknown endianness for mmio: %v\n", err) + return nil, fmt.Errorf("unknown endianness for mmio: %v", err) } +// OpenReadOnly read only mmio region func (mr *MemoryResource) OpenReadOnly() (mmio.Mmio, error) { ro, err := mmio.OpenRO(mr.Path, 0, int(mr.End-mr.Start+1)) if err != nil { - return nil, fmt.Errorf("failed to open file for mmio: %v\n", err) + return nil, fmt.Errorf("failed to open file for mmio: %v", err) } switch ro.Read32(pmcEndianRegister) { case pmcBigEndian: @@ -60,5 +63,5 @@ func (mr *MemoryResource) OpenReadOnly() (mmio.Mmio, error) { case pmcLittleEndian: return ro.LittleEndian(), nil } - return nil, fmt.Errorf("unknown endianness for mmio: %v\n", err) + return nil, fmt.Errorf("unknown endianness for mmio: %v", err) } From 1f718a1568917330ba9e43a5b06b87ad0c2d7446 Mon Sep 17 00:00:00 2001 From: zvonkok Date: Wed, 16 Feb 2022 10:44:32 +0100 Subject: [PATCH 2/3] Update the Open API to OpenRO and OpenRW --- pkg/nvpci/nvpci_test.go | 2 +- pkg/nvpci/resources.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/nvpci/nvpci_test.go b/pkg/nvpci/nvpci_test.go index 807ec0a..f651a56 100644 --- a/pkg/nvpci/nvpci_test.go +++ b/pkg/nvpci/nvpci_test.go @@ -52,7 +52,7 @@ func TestNvpci(t *testing.T) { require.Equal(t, 0, len(capabilities.Extended), "Wrong number of extended PCI capabilities") resource0 := devices[0].Resources[0] - bar0, err := resource0.Open() + bar0, err := resource0.OpenRW() require.Nil(t, err, "Error opening bar0") defer func() { err := bar0.Close() diff --git a/pkg/nvpci/resources.go b/pkg/nvpci/resources.go index 5c985ac..90b82fc 100644 --- a/pkg/nvpci/resources.go +++ b/pkg/nvpci/resources.go @@ -36,8 +36,8 @@ type MemoryResource struct { Path string } -// Open read write mmio region -func (mr *MemoryResource) Open() (mmio.Mmio, error) { +// OpenRW read write mmio region +func (mr *MemoryResource) OpenRW() (mmio.Mmio, error) { rw, err := mmio.OpenRW(mr.Path, 0, int(mr.End-mr.Start+1)) if err != nil { return nil, fmt.Errorf("failed to open file for mmio: %v", err) @@ -51,8 +51,8 @@ func (mr *MemoryResource) Open() (mmio.Mmio, error) { return nil, fmt.Errorf("unknown endianness for mmio: %v", err) } -// OpenReadOnly read only mmio region -func (mr *MemoryResource) OpenReadOnly() (mmio.Mmio, error) { +// OpenRO read only mmio region +func (mr *MemoryResource) OpenRO() (mmio.Mmio, error) { ro, err := mmio.OpenRO(mr.Path, 0, int(mr.End-mr.Start+1)) if err != nil { return nil, fmt.Errorf("failed to open file for mmio: %v", err) From 9196546dcc20580775ff7a3b2e2b9e90ffd74cb1 Mon Sep 17 00:00:00 2001 From: zvonkok Date: Wed, 16 Feb 2022 10:45:15 +0100 Subject: [PATCH 3/3] Add the status byte check --- pkg/nvpci/config.go | 7 +++++++ pkg/nvpci/mock.go | 1 + 2 files changed, 8 insertions(+) diff --git a/pkg/nvpci/config.go b/pkg/nvpci/config.go index 5c4fc7f..5373a0f 100644 --- a/pkg/nvpci/config.go +++ b/pkg/nvpci/config.go @@ -27,6 +27,8 @@ const ( pciCfgSpaceStandardSize = 256 pciCfgSpaceExtendedSize = 4096 pciCapabilityListPointer = 0x34 + pciStatusCapabilityList = 0x10 + pciStatusBytePosition = 0x06 ) // ConfigSpace PCI configuration space (standard extended) file path @@ -85,6 +87,11 @@ func (cs *configSpaceIO) GetPCICapabilities() (*PCICapabilities, error) { make(map[uint16]*PCIExtendedCapability), } + support := cs.Read8(pciStatusBytePosition) & pciStatusCapabilityList + if support == 0 { + return nil, fmt.Errorf("pci device does not support capability list") + } + soffset := cs.Read8(pciCapabilityListPointer) if int(soffset) >= cs.Len() { return nil, fmt.Errorf("capability list pointer out of bounds") diff --git a/pkg/nvpci/mock.go b/pkg/nvpci/mock.go index 60d57ae..7448d0d 100644 --- a/pkg/nvpci/mock.go +++ b/pkg/nvpci/mock.go @@ -108,6 +108,7 @@ func (m *MockNvpci) AddMockA100(address string, numaNode int) error { data := bytes.New(&_data) data.Write16(0, pciNvidiaVendorID) data.Write16(2, uint16(0x20bf)) + data.Write8(pciStatusBytePosition, pciStatusCapabilityList) _, err = config.Write(*data.Raw()) if err != nil { return err