Added PCI IDS support and DPU detection

This commit is contained in:
Zvonko Kaiser 2022-10-19 01:42:30 -07:00
parent 8b5e3d224d
commit f3102f8dcb
6 changed files with 35795 additions and 4 deletions

4
NOTICE Normal file
View File

@ -0,0 +1,4 @@
The file pkg/pciids/default_pci.ids is distributed under the 3-clause BSD License.
Maintained by Albert Pool, Martin Mares, and other volunteers from
the PCI ID Project at https://pci-ids.ucw.cz/.

View File

@ -129,6 +129,10 @@ func (cs *configSpaceIO) GetPCICapabilities() (*PCICapabilities, error) {
if int(eoffset) >= PCICfgSpaceExtendedSize {
return nil, fmt.Errorf("extended capability list pointer out of bounds")
}
// |31 20|19 16|15 0|
// |--------------------|------|-------------------------|
// | Next Cap Offset |Vers. |PCI Express Ext. Cap ID |
data := cs.Read32(int(eoffset))
id := uint16(data & 0xffff)
version := uint8((data >> 16) & 0xf)
@ -136,7 +140,7 @@ func (cs *configSpaceIO) GetPCICapabilities() (*PCICapabilities, error) {
cs.Slice(int(eoffset), cs.Len()-int(eoffset)),
version,
}
eoffset = uint16((data >> 4) & 0xffc)
eoffset = uint16((data >> 20) & 0xfff)
}
return caps, nil

105
pkg/nvpci/mlxpci.go Normal file
View File

@ -0,0 +1,105 @@
/*
* Copyright (c) NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nvpci
import (
"fmt"
"strings"
)
const (
// PCIMellanoxVendorID represents PCI vendor id for Mellanox
PCIMellanoxVendorID uint16 = 0x15b3
// PCINetworkControllerClass represents the PCI class for network controllers
PCINetworkControllerClass uint32 = 0x020000
// PCIBridgeClass represents the PCI class for network controllers
PCIBridgeClass uint32 = 0x060400
)
// GetNetworkControllers returns all Mellanox Network Controller PCI devices on the system
func (p *nvpci) GetNetworkControllers() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
return nil, fmt.Errorf("error getting all NVIDIA devices: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsNetworkController() {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// GetPciBridges retrieves all Mellanox PCI(e) Bridges
func (p *nvpci) GetPciBridges() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
return nil, fmt.Errorf("error getting all NVIDIA devices: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsPciBridge() {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// IsNetworkController if class == 0x300
func (d *NvidiaPCIDevice) IsNetworkController() bool {
return d.Class == PCINetworkControllerClass
}
// IsPciBridge if class == 0x0604
func (d *NvidiaPCIDevice) IsPciBridge() bool {
return d.Class == PCIBridgeClass
}
// IsDPU returns if a device is a DPU
func (d *NvidiaPCIDevice) IsDPU() bool {
if !strings.Contains(d.DeviceName, "BlueField") {
return false
}
// DPU is a multifunction device hence look only for the .0 function
// and ignore subfunctions like .1, .2, etc.
if strings.HasSuffix(d.Address, ".0") {
return true
}
return false
}
// GetDPUs returns all Mellanox DPU devices on the system
func (p *nvpci) GetDPUs() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetNetworkControllers()
if err != nil {
return nil, fmt.Errorf("error getting all network controllers: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsDPU() {
filtered = append(filtered, d)
}
}
return filtered, nil
}

View File

@ -24,6 +24,8 @@ import (
"sort"
"strconv"
"strings"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/pciids"
)
const (
@ -47,6 +49,9 @@ type Interface interface {
GetNVSwitches() ([]*NvidiaPCIDevice, error)
GetGPUs() ([]*NvidiaPCIDevice, error)
GetGPUByIndex(int) (*NvidiaPCIDevice, error)
GetNetworkControllers() ([]*NvidiaPCIDevice, error)
GetPciBridges() ([]*NvidiaPCIDevice, error)
GetDPUs() ([]*NvidiaPCIDevice, error)
}
// MemoryResources a more human readable handle
@ -70,7 +75,9 @@ type NvidiaPCIDevice struct {
Address string
Vendor uint16
Class uint32
ClassName string
Device uint16
DeviceName string
Driver string
IommuGroup int
NumaNode int
@ -117,12 +124,14 @@ func (d *NvidiaPCIDevice) Reset() error {
// New interface that allows us to get a list of all NVIDIA PCI devices
func New() Interface {
return &nvpci{PCIDevicesRoot}
return NewFrom(PCIDevicesRoot)
}
// NewFrom interface allows us to get a list of all NVIDIA PCI devices at a specific root directory
func NewFrom(root string) Interface {
return &nvpci{root}
return &nvpci{
pciDevicesRoot: root,
}
}
// GetAllDevices returns all Nvidia PCI devices on the system
@ -173,7 +182,7 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
return nil, fmt.Errorf("unable to convert vendor string to uint16: %v", vendorStr)
}
if uint16(vendorID) != PCINvidiaVendorID {
if uint16(vendorID) != PCINvidiaVendorID && uint16(vendorID) != PCIMellanoxVendorID {
return nil, nil
}
@ -270,6 +279,8 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
}
}
pciDB := pciids.NewDB()
nvdevice := &NvidiaPCIDevice{
Path: devicePath,
Address: address,
@ -282,6 +293,8 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
Config: config,
Resources: resources,
IsVF: isVF,
DeviceName: pciDB.GetDeviceName(uint16(vendorID), uint16(deviceID)),
ClassName: pciDB.GetClassName(uint32(classID)),
}
return nvdevice, nil

35261
pkg/pciids/default_pci.ids Normal file

File diff suppressed because it is too large Load Diff

404
pkg/pciids/pciids.go Normal file
View File

@ -0,0 +1,404 @@
package pciids
import (
"bufio"
"bytes"
_ "embed" // Fallback is the embedded pci.ids db file
"fmt"
"io"
"os"
"strconv"
"strings"
)
// token what the Lexer retruns
type token int
const (
// ILLEGAL a token which the Lexer does not understand
ILLEGAL token = iota
// EOF end of file
EOF
// WS whitespace
WS
// NEWLINE '\n'
NEWLINE
// COMMENT '# something'
COMMENT
// VENDOR PCI vendor
VENDOR
// SUBVENDOR PCI subvendor
SUBVENDOR
// DEVICE PCI device
DEVICE
// CLASS PCI class
CLASS
// SUBCLASS PCI subclass
SUBCLASS
// PROGIF PCI programming interface
PROGIF
)
// literal values from the Lexer
type literal struct {
ID string
name string
SubName string
}
// scanner a lexical scanner
type scanner struct {
r *bufio.Reader
isVendor bool
}
// newScanner well a new scanner ...
func newScanner(r io.Reader) *scanner {
return &scanner{r: bufio.NewReader(r)}
}
// Since the pci.ids is line base we're consuming a whole line rather then only
// a single rune/char
func (s *scanner) readline() []byte {
ln, err := s.r.ReadBytes('\n')
if err == io.EOF {
return []byte{'E', 'O', 'F'}
}
if err != nil {
fmt.Printf("ReadBytes failed with %v", err)
return []byte{}
}
return ln
}
func scanClass(line []byte) (token, literal) {
class := string(line[1:])
return CLASS, scanEntry([]byte(class), 2)
}
func scanSubVendor(line []byte) (token, literal) {
trim0 := strings.TrimSpace(string(line))
subv := string(trim0[:4])
trim1 := strings.TrimSpace(trim0[4:])
subd := string(trim1[:4])
subn := strings.TrimSpace(trim1[4:])
return SUBVENDOR, literal{subv, subd, subn}
}
func scanEntry(line []byte, offset uint) literal {
trim := strings.TrimSpace(string(line))
id := string(trim[:offset])
name := strings.TrimSpace(trim[offset:])
return literal{id, name, ""}
}
func isLeadingOneTab(ln []byte) bool { return (ln[0] == '\t') && (ln[1] != '\t') }
func isLeadingTwoTabs(ln []byte) bool { return (ln[0] == '\t') && (ln[1] == '\t') }
func isHexDigit(ln []byte) bool { return (ln[0] >= '0' && ln[0] <= '9') }
func isHexLetter(ln []byte) bool { return (ln[0] >= 'a' && ln[0] <= 'f') }
func isVendor(ln []byte) bool { return isHexDigit(ln) || isHexLetter(ln) }
func isEOF(ln []byte) bool { return (ln[0] == 'E' && ln[1] == 'O' && ln[2] == 'F') }
func isComment(ln []byte) bool { return (ln[0] == '#') }
func isSubVendor(ln []byte) bool { return isLeadingTwoTabs(ln) }
func isDevice(ln []byte) bool { return isLeadingOneTab(ln) }
func isNewline(ln []byte) bool { return (ln[0] == '\n') }
// List of known device classes, subclasses and programming interfaces
func isClass(ln []byte) bool { return (ln[0] == 'C') }
func isProgIf(ln []byte) bool { return isLeadingTwoTabs(ln) }
func isSubClass(ln []byte) bool { return isLeadingOneTab(ln) }
// unread places the previously read rune back on the reader.
func (s *scanner) unread() { _ = s.r.UnreadRune() }
// scan returns the next token and literal value.
func (s *scanner) scan() (tok token, lit literal) {
line := s.readline()
if isEOF(line) {
return EOF, literal{}
}
if isNewline(line) {
return NEWLINE, literal{ID: string('\n')}
}
if isComment(line) {
return COMMENT, literal{ID: string(line)}
}
// vendors
if isVendor(line) {
s.isVendor = true
return VENDOR, scanEntry(line, 4)
}
if isSubVendor(line) && s.isVendor {
return scanSubVendor(line)
}
if isDevice(line) && s.isVendor {
return DEVICE, scanEntry(line, 4)
}
// classes
if isClass(line) {
s.isVendor = false
return scanClass(line)
}
if isProgIf(line) && !s.isVendor {
return PROGIF, scanEntry(line, 2)
}
if isSubClass(line) && !s.isVendor {
return SUBCLASS, scanEntry(line, 2)
}
return ILLEGAL, literal{ID: string(line)}
}
// parser reads the tokens returned by the Lexer and constructs the AST
type parser struct {
s *scanner
buf struct {
tok token
lit literal
n int
}
}
// This is a fallback if all of the locations fail
//go:embed default_pci.ids
var defaultPCIdb []byte
// NewDB Parse the PCI DB in its default locations or use the default
// builtin pci.ids db.
func NewDB() Interface {
// Various locations of pci.ids for differente distributions these may be more
// up to date then the embedded pci.ids db
pcidbs := []string{
"/usr/share/misc/pci.ids", // Ubuntu
"/usr/local/share/pci.ids", // RHEL like with manual update
"/usr/share/hwdata/pci.ids", // RHEL like
"/usr/share/pci.ids", // SUSE
}
return newParser(pcidbs).parse()
}
// newParser will attempt to read the db pci.ids from well known places or fall
// back to an internal db
func newParser(pcidbs []string) *parser {
for _, db := range pcidbs {
file, err := os.ReadFile(db)
if err != nil {
continue
}
return newParserFromReader(bufio.NewReader(bytes.NewReader(file)))
}
// We're using go embed above to have the byte array
// correctly initialized with the internal shipped db
// if we cannot find an up to date in the filesystem
return newParserFromReader(bufio.NewReader(bytes.NewReader(defaultPCIdb)))
}
func newParserFromReader(r *bufio.Reader) *parser {
return &parser{s: newScanner(r)}
}
func (p *parser) scan() (tok token, lit literal) {
if p.buf.n != 0 {
p.buf.n = 0
return p.buf.tok, p.buf.lit
}
tok, lit = p.s.scan()
p.buf.tok, p.buf.lit = tok, lit
return
}
func (p *parser) unscan() { p.buf.n = 1 }
var _ Interface = (*pcidb)(nil)
// Interface returns textual description of specific attributes of PCI devices
type Interface interface {
GetDeviceName(uint16, uint16) string
GetClassName(uint32) string
}
// GetDeviceName return the textual description of the PCI device
func (d *pcidb) GetDeviceName(vendorID uint16, deviceID uint16) string {
return d.vendors[vendorID].devices[deviceID].name
}
// GetClassName resturn the textual description of the PCI device class
func (d *pcidb) GetClassName(classID uint32) string {
return d.classes[classID].name
}
// pcidb The complete set of PCI vendors and PCI classes
type pcidb struct {
vendors map[uint16]vendor
classes map[uint32]class
}
// vendor PCI vendors/devices/subVendors/SubDevices
type vendor struct {
name string
devices map[uint16]device
}
// subVendor PCI subVendor
type subVendor struct {
SubDevices map[uint16]SubDevice
}
// SubDevice PCI SubDevice
type SubDevice struct {
name string
}
// device PCI device
type device struct {
name string
subVendors map[uint16]subVendor
}
// class PCI classes/subClasses/Programming Interfaces
type class struct {
name string
subClasses map[uint32]subClass
}
// subClass PCI subClass
type subClass struct {
name string
progIfs map[uint8]progIf
}
// progIf PCI Programming Interface
type progIf struct {
name string
}
// parse parses a PCI IDS entry
func (p *parser) parse() Interface {
db := &pcidb{
vendors: map[uint16]vendor{},
classes: map[uint32]class{},
}
// Used for housekeeping, breadcrumb for aggregated types
var hkVendor vendor
var hkDevice device
var hkClass class
var hkSubClass subClass
var hkFullID uint32 = 0
var hkFullName [2]string
for {
tok, lit := p.scan()
// We're ignoring COMMENT, NEWLINE
// An EOF will break the loop
if tok == EOF {
break
}
// PCI vendors -------------------------------------------------
if tok == VENDOR {
id, _ := strconv.ParseUint(lit.ID, 16, 16)
db.vendors[uint16(id)] = vendor{
name: lit.name,
devices: map[uint16]device{},
}
hkVendor = db.vendors[uint16(id)]
}
if tok == DEVICE {
id, _ := strconv.ParseUint(lit.ID, 16, 16)
hkVendor.devices[uint16(id)] = device{
name: lit.name,
subVendors: map[uint16]subVendor{},
}
hkDevice = hkVendor.devices[uint16(id)]
}
if tok == SUBVENDOR {
id, _ := strconv.ParseUint(lit.ID, 16, 16)
hkDevice.subVendors[uint16(id)] = subVendor{
SubDevices: map[uint16]SubDevice{},
}
subvendor := hkDevice.subVendors[uint16(id)]
subid, _ := strconv.ParseUint(lit.name, 16, 16)
subvendor.SubDevices[uint16(subid)] = SubDevice{
name: lit.SubName,
}
}
// PCI classes -------------------------------------------------
if tok == CLASS {
id, _ := strconv.ParseUint(lit.ID, 16, 32)
db.classes[uint32(id)] = class{
name: lit.name,
subClasses: map[uint32]subClass{},
}
hkClass = db.classes[uint32(id)]
hkFullID = uint32(id) << 16
hkFullID = hkFullID & 0xFFFF0000
hkFullName[0] = fmt.Sprintf("%s (%02x)", lit.name, id)
}
if tok == SUBCLASS {
id, _ := strconv.ParseUint(lit.ID, 16, 8)
hkClass.subClasses[uint32(id)] = subClass{
name: lit.name,
progIfs: map[uint8]progIf{},
}
hkSubClass = hkClass.subClasses[uint32(id)]
// Clear the last detected sub class
hkFullID = hkFullID & 0xFFFF0000
hkFullID = hkFullID | uint32(id)<<8
// Clear the last detected prog iface
hkFullID = hkFullID & 0xFFFFFF00
hkFullName[1] = fmt.Sprintf("%s (%02x)", lit.name, id)
db.classes[uint32(hkFullID)] = class{
name: hkFullName[0] + " | " + hkFullName[1],
}
}
if tok == PROGIF {
id, _ := strconv.ParseUint(lit.ID, 16, 8)
hkSubClass.progIfs[uint8(id)] = progIf{
name: lit.name,
}
finalID := hkFullID | uint32(id)
name := fmt.Sprintf("%s (%02x)", lit.name, id)
finalName := hkFullName[0] + " | " + hkFullName[1] + " | " + name
db.classes[finalID] = class{
name: finalName,
}
}
if tok == ILLEGAL {
fmt.Printf("warning: illegal token %s %s cannot parse PCI IDS, database may be incomplete ", lit.ID, lit.name)
}
}
return db
}