Added PCI IDS support and DPU detection

This commit is contained in:
Zvonko Kaiser 2022-10-19 01:42:30 -07:00
parent 8b5e3d224d
commit f3102f8dcb
6 changed files with 35795 additions and 4 deletions

4
NOTICE Normal file
View File

@ -0,0 +1,4 @@
The file pkg/pciids/default_pci.ids is distributed under the 3-clause BSD License.
Maintained by Albert Pool, Martin Mares, and other volunteers from
the PCI ID Project at https://pci-ids.ucw.cz/.

View File

@ -129,6 +129,10 @@ func (cs *configSpaceIO) GetPCICapabilities() (*PCICapabilities, error) {
if int(eoffset) >= PCICfgSpaceExtendedSize { if int(eoffset) >= PCICfgSpaceExtendedSize {
return nil, fmt.Errorf("extended capability list pointer out of bounds") return nil, fmt.Errorf("extended capability list pointer out of bounds")
} }
// |31 20|19 16|15 0|
// |--------------------|------|-------------------------|
// | Next Cap Offset |Vers. |PCI Express Ext. Cap ID |
data := cs.Read32(int(eoffset)) data := cs.Read32(int(eoffset))
id := uint16(data & 0xffff) id := uint16(data & 0xffff)
version := uint8((data >> 16) & 0xf) version := uint8((data >> 16) & 0xf)
@ -136,7 +140,7 @@ func (cs *configSpaceIO) GetPCICapabilities() (*PCICapabilities, error) {
cs.Slice(int(eoffset), cs.Len()-int(eoffset)), cs.Slice(int(eoffset), cs.Len()-int(eoffset)),
version, version,
} }
eoffset = uint16((data >> 4) & 0xffc) eoffset = uint16((data >> 20) & 0xfff)
} }
return caps, nil return caps, nil

105
pkg/nvpci/mlxpci.go Normal file
View File

@ -0,0 +1,105 @@
/*
* Copyright (c) NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nvpci
import (
"fmt"
"strings"
)
const (
// PCIMellanoxVendorID represents PCI vendor id for Mellanox
PCIMellanoxVendorID uint16 = 0x15b3
// PCINetworkControllerClass represents the PCI class for network controllers
PCINetworkControllerClass uint32 = 0x020000
// PCIBridgeClass represents the PCI class for network controllers
PCIBridgeClass uint32 = 0x060400
)
// GetNetworkControllers returns all Mellanox Network Controller PCI devices on the system
func (p *nvpci) GetNetworkControllers() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
return nil, fmt.Errorf("error getting all NVIDIA devices: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsNetworkController() {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// GetPciBridges retrieves all Mellanox PCI(e) Bridges
func (p *nvpci) GetPciBridges() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
return nil, fmt.Errorf("error getting all NVIDIA devices: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsPciBridge() {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// IsNetworkController if class == 0x300
func (d *NvidiaPCIDevice) IsNetworkController() bool {
return d.Class == PCINetworkControllerClass
}
// IsPciBridge if class == 0x0604
func (d *NvidiaPCIDevice) IsPciBridge() bool {
return d.Class == PCIBridgeClass
}
// IsDPU returns if a device is a DPU
func (d *NvidiaPCIDevice) IsDPU() bool {
if !strings.Contains(d.DeviceName, "BlueField") {
return false
}
// DPU is a multifunction device hence look only for the .0 function
// and ignore subfunctions like .1, .2, etc.
if strings.HasSuffix(d.Address, ".0") {
return true
}
return false
}
// GetDPUs returns all Mellanox DPU devices on the system
func (p *nvpci) GetDPUs() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetNetworkControllers()
if err != nil {
return nil, fmt.Errorf("error getting all network controllers: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsDPU() {
filtered = append(filtered, d)
}
}
return filtered, nil
}

View File

@ -24,6 +24,8 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/pciids"
) )
const ( const (
@ -47,6 +49,9 @@ type Interface interface {
GetNVSwitches() ([]*NvidiaPCIDevice, error) GetNVSwitches() ([]*NvidiaPCIDevice, error)
GetGPUs() ([]*NvidiaPCIDevice, error) GetGPUs() ([]*NvidiaPCIDevice, error)
GetGPUByIndex(int) (*NvidiaPCIDevice, error) GetGPUByIndex(int) (*NvidiaPCIDevice, error)
GetNetworkControllers() ([]*NvidiaPCIDevice, error)
GetPciBridges() ([]*NvidiaPCIDevice, error)
GetDPUs() ([]*NvidiaPCIDevice, error)
} }
// MemoryResources a more human readable handle // MemoryResources a more human readable handle
@ -70,7 +75,9 @@ type NvidiaPCIDevice struct {
Address string Address string
Vendor uint16 Vendor uint16
Class uint32 Class uint32
ClassName string
Device uint16 Device uint16
DeviceName string
Driver string Driver string
IommuGroup int IommuGroup int
NumaNode int NumaNode int
@ -117,12 +124,14 @@ func (d *NvidiaPCIDevice) Reset() error {
// New interface that allows us to get a list of all NVIDIA PCI devices // New interface that allows us to get a list of all NVIDIA PCI devices
func New() Interface { func New() Interface {
return &nvpci{PCIDevicesRoot} return NewFrom(PCIDevicesRoot)
} }
// NewFrom interface allows us to get a list of all NVIDIA PCI devices at a specific root directory // NewFrom interface allows us to get a list of all NVIDIA PCI devices at a specific root directory
func NewFrom(root string) Interface { func NewFrom(root string) Interface {
return &nvpci{root} return &nvpci{
pciDevicesRoot: root,
}
} }
// GetAllDevices returns all Nvidia PCI devices on the system // GetAllDevices returns all Nvidia PCI devices on the system
@ -173,7 +182,7 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
return nil, fmt.Errorf("unable to convert vendor string to uint16: %v", vendorStr) return nil, fmt.Errorf("unable to convert vendor string to uint16: %v", vendorStr)
} }
if uint16(vendorID) != PCINvidiaVendorID { if uint16(vendorID) != PCINvidiaVendorID && uint16(vendorID) != PCIMellanoxVendorID {
return nil, nil return nil, nil
} }
@ -270,6 +279,8 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
} }
} }
pciDB := pciids.NewDB()
nvdevice := &NvidiaPCIDevice{ nvdevice := &NvidiaPCIDevice{
Path: devicePath, Path: devicePath,
Address: address, Address: address,
@ -282,6 +293,8 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
Config: config, Config: config,
Resources: resources, Resources: resources,
IsVF: isVF, IsVF: isVF,
DeviceName: pciDB.GetDeviceName(uint16(vendorID), uint16(deviceID)),
ClassName: pciDB.GetClassName(uint32(classID)),
} }
return nvdevice, nil return nvdevice, nil

35261
pkg/pciids/default_pci.ids Normal file

File diff suppressed because it is too large Load Diff

404
pkg/pciids/pciids.go Normal file
View File

@ -0,0 +1,404 @@
package pciids
import (
"bufio"
"bytes"
_ "embed" // Fallback is the embedded pci.ids db file
"fmt"
"io"
"os"
"strconv"
"strings"
)
// token what the Lexer retruns
type token int
const (
// ILLEGAL a token which the Lexer does not understand
ILLEGAL token = iota
// EOF end of file
EOF
// WS whitespace
WS
// NEWLINE '\n'
NEWLINE
// COMMENT '# something'
COMMENT
// VENDOR PCI vendor
VENDOR
// SUBVENDOR PCI subvendor
SUBVENDOR
// DEVICE PCI device
DEVICE
// CLASS PCI class
CLASS
// SUBCLASS PCI subclass
SUBCLASS
// PROGIF PCI programming interface
PROGIF
)
// literal values from the Lexer
type literal struct {
ID string
name string
SubName string
}
// scanner a lexical scanner
type scanner struct {
r *bufio.Reader
isVendor bool
}
// newScanner well a new scanner ...
func newScanner(r io.Reader) *scanner {
return &scanner{r: bufio.NewReader(r)}
}
// Since the pci.ids is line base we're consuming a whole line rather then only
// a single rune/char
func (s *scanner) readline() []byte {
ln, err := s.r.ReadBytes('\n')
if err == io.EOF {
return []byte{'E', 'O', 'F'}
}
if err != nil {
fmt.Printf("ReadBytes failed with %v", err)
return []byte{}
}
return ln
}
func scanClass(line []byte) (token, literal) {
class := string(line[1:])
return CLASS, scanEntry([]byte(class), 2)
}
func scanSubVendor(line []byte) (token, literal) {
trim0 := strings.TrimSpace(string(line))
subv := string(trim0[:4])
trim1 := strings.TrimSpace(trim0[4:])
subd := string(trim1[:4])
subn := strings.TrimSpace(trim1[4:])
return SUBVENDOR, literal{subv, subd, subn}
}
func scanEntry(line []byte, offset uint) literal {
trim := strings.TrimSpace(string(line))
id := string(trim[:offset])
name := strings.TrimSpace(trim[offset:])
return literal{id, name, ""}
}
func isLeadingOneTab(ln []byte) bool { return (ln[0] == '\t') && (ln[1] != '\t') }
func isLeadingTwoTabs(ln []byte) bool { return (ln[0] == '\t') && (ln[1] == '\t') }
func isHexDigit(ln []byte) bool { return (ln[0] >= '0' && ln[0] <= '9') }
func isHexLetter(ln []byte) bool { return (ln[0] >= 'a' && ln[0] <= 'f') }
func isVendor(ln []byte) bool { return isHexDigit(ln) || isHexLetter(ln) }
func isEOF(ln []byte) bool { return (ln[0] == 'E' && ln[1] == 'O' && ln[2] == 'F') }
func isComment(ln []byte) bool { return (ln[0] == '#') }
func isSubVendor(ln []byte) bool { return isLeadingTwoTabs(ln) }
func isDevice(ln []byte) bool { return isLeadingOneTab(ln) }
func isNewline(ln []byte) bool { return (ln[0] == '\n') }
// List of known device classes, subclasses and programming interfaces
func isClass(ln []byte) bool { return (ln[0] == 'C') }
func isProgIf(ln []byte) bool { return isLeadingTwoTabs(ln) }
func isSubClass(ln []byte) bool { return isLeadingOneTab(ln) }
// unread places the previously read rune back on the reader.
func (s *scanner) unread() { _ = s.r.UnreadRune() }
// scan returns the next token and literal value.
func (s *scanner) scan() (tok token, lit literal) {
line := s.readline()
if isEOF(line) {
return EOF, literal{}
}
if isNewline(line) {
return NEWLINE, literal{ID: string('\n')}
}
if isComment(line) {
return COMMENT, literal{ID: string(line)}
}
// vendors
if isVendor(line) {
s.isVendor = true
return VENDOR, scanEntry(line, 4)
}
if isSubVendor(line) && s.isVendor {
return scanSubVendor(line)
}
if isDevice(line) && s.isVendor {
return DEVICE, scanEntry(line, 4)
}
// classes
if isClass(line) {
s.isVendor = false
return scanClass(line)
}
if isProgIf(line) && !s.isVendor {
return PROGIF, scanEntry(line, 2)
}
if isSubClass(line) && !s.isVendor {
return SUBCLASS, scanEntry(line, 2)
}
return ILLEGAL, literal{ID: string(line)}
}
// parser reads the tokens returned by the Lexer and constructs the AST
type parser struct {
s *scanner
buf struct {
tok token
lit literal
n int
}
}
// This is a fallback if all of the locations fail
//go:embed default_pci.ids
var defaultPCIdb []byte
// NewDB Parse the PCI DB in its default locations or use the default
// builtin pci.ids db.
func NewDB() Interface {
// Various locations of pci.ids for differente distributions these may be more
// up to date then the embedded pci.ids db
pcidbs := []string{
"/usr/share/misc/pci.ids", // Ubuntu
"/usr/local/share/pci.ids", // RHEL like with manual update
"/usr/share/hwdata/pci.ids", // RHEL like
"/usr/share/pci.ids", // SUSE
}
return newParser(pcidbs).parse()
}
// newParser will attempt to read the db pci.ids from well known places or fall
// back to an internal db
func newParser(pcidbs []string) *parser {
for _, db := range pcidbs {
file, err := os.ReadFile(db)
if err != nil {
continue
}
return newParserFromReader(bufio.NewReader(bytes.NewReader(file)))
}
// We're using go embed above to have the byte array
// correctly initialized with the internal shipped db
// if we cannot find an up to date in the filesystem
return newParserFromReader(bufio.NewReader(bytes.NewReader(defaultPCIdb)))
}
func newParserFromReader(r *bufio.Reader) *parser {
return &parser{s: newScanner(r)}
}
func (p *parser) scan() (tok token, lit literal) {
if p.buf.n != 0 {
p.buf.n = 0
return p.buf.tok, p.buf.lit
}
tok, lit = p.s.scan()
p.buf.tok, p.buf.lit = tok, lit
return
}
func (p *parser) unscan() { p.buf.n = 1 }
var _ Interface = (*pcidb)(nil)
// Interface returns textual description of specific attributes of PCI devices
type Interface interface {
GetDeviceName(uint16, uint16) string
GetClassName(uint32) string
}
// GetDeviceName return the textual description of the PCI device
func (d *pcidb) GetDeviceName(vendorID uint16, deviceID uint16) string {
return d.vendors[vendorID].devices[deviceID].name
}
// GetClassName resturn the textual description of the PCI device class
func (d *pcidb) GetClassName(classID uint32) string {
return d.classes[classID].name
}
// pcidb The complete set of PCI vendors and PCI classes
type pcidb struct {
vendors map[uint16]vendor
classes map[uint32]class
}
// vendor PCI vendors/devices/subVendors/SubDevices
type vendor struct {
name string
devices map[uint16]device
}
// subVendor PCI subVendor
type subVendor struct {
SubDevices map[uint16]SubDevice
}
// SubDevice PCI SubDevice
type SubDevice struct {
name string
}
// device PCI device
type device struct {
name string
subVendors map[uint16]subVendor
}
// class PCI classes/subClasses/Programming Interfaces
type class struct {
name string
subClasses map[uint32]subClass
}
// subClass PCI subClass
type subClass struct {
name string
progIfs map[uint8]progIf
}
// progIf PCI Programming Interface
type progIf struct {
name string
}
// parse parses a PCI IDS entry
func (p *parser) parse() Interface {
db := &pcidb{
vendors: map[uint16]vendor{},
classes: map[uint32]class{},
}
// Used for housekeeping, breadcrumb for aggregated types
var hkVendor vendor
var hkDevice device
var hkClass class
var hkSubClass subClass
var hkFullID uint32 = 0
var hkFullName [2]string
for {
tok, lit := p.scan()
// We're ignoring COMMENT, NEWLINE
// An EOF will break the loop
if tok == EOF {
break
}
// PCI vendors -------------------------------------------------
if tok == VENDOR {
id, _ := strconv.ParseUint(lit.ID, 16, 16)
db.vendors[uint16(id)] = vendor{
name: lit.name,
devices: map[uint16]device{},
}
hkVendor = db.vendors[uint16(id)]
}
if tok == DEVICE {
id, _ := strconv.ParseUint(lit.ID, 16, 16)
hkVendor.devices[uint16(id)] = device{
name: lit.name,
subVendors: map[uint16]subVendor{},
}
hkDevice = hkVendor.devices[uint16(id)]
}
if tok == SUBVENDOR {
id, _ := strconv.ParseUint(lit.ID, 16, 16)
hkDevice.subVendors[uint16(id)] = subVendor{
SubDevices: map[uint16]SubDevice{},
}
subvendor := hkDevice.subVendors[uint16(id)]
subid, _ := strconv.ParseUint(lit.name, 16, 16)
subvendor.SubDevices[uint16(subid)] = SubDevice{
name: lit.SubName,
}
}
// PCI classes -------------------------------------------------
if tok == CLASS {
id, _ := strconv.ParseUint(lit.ID, 16, 32)
db.classes[uint32(id)] = class{
name: lit.name,
subClasses: map[uint32]subClass{},
}
hkClass = db.classes[uint32(id)]
hkFullID = uint32(id) << 16
hkFullID = hkFullID & 0xFFFF0000
hkFullName[0] = fmt.Sprintf("%s (%02x)", lit.name, id)
}
if tok == SUBCLASS {
id, _ := strconv.ParseUint(lit.ID, 16, 8)
hkClass.subClasses[uint32(id)] = subClass{
name: lit.name,
progIfs: map[uint8]progIf{},
}
hkSubClass = hkClass.subClasses[uint32(id)]
// Clear the last detected sub class
hkFullID = hkFullID & 0xFFFF0000
hkFullID = hkFullID | uint32(id)<<8
// Clear the last detected prog iface
hkFullID = hkFullID & 0xFFFFFF00
hkFullName[1] = fmt.Sprintf("%s (%02x)", lit.name, id)
db.classes[uint32(hkFullID)] = class{
name: hkFullName[0] + " | " + hkFullName[1],
}
}
if tok == PROGIF {
id, _ := strconv.ParseUint(lit.ID, 16, 8)
hkSubClass.progIfs[uint8(id)] = progIf{
name: lit.name,
}
finalID := hkFullID | uint32(id)
name := fmt.Sprintf("%s (%02x)", lit.name, id)
finalName := hkFullName[0] + " | " + hkFullName[1] + " | " + name
db.classes[finalID] = class{
name: finalName,
}
}
if tok == ILLEGAL {
fmt.Printf("warning: illegal token %s %s cannot parse PCI IDS, database may be incomplete ", lit.ID, lit.name)
}
}
return db
}