Add pkg/nvpci as a direct port from mig-parted/pkg/nvpci

Signed-off-by: Kevin Klues <kklues@nvidia.com>
This commit is contained in:
Kevin Klues 2021-03-22 10:57:07 +00:00
parent 613fd315f3
commit 6a0fd37ab6
5 changed files with 643 additions and 0 deletions

126
pkg/nvpci/config.go Normal file
View File

@ -0,0 +1,126 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nvpci
import (
"fmt"
"io/ioutil"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/bytes"
)
const (
pciCfgSpaceStandardSize = 256
pciCfgSpaceExtendedSize = 4096
pciCapabilityListPointer = 0x34
)
type ConfigSpace struct {
Path string
}
type ConfigSpaceIO interface {
bytes.Bytes
GetVendorID() uint16
GetDeviceID() uint16
GetPCICapabilities() (*PCICapabilities, error)
}
type configSpaceIO struct {
bytes.Bytes
}
type PCIStandardCapability struct {
bytes.Bytes
}
type PCIExtendedCapability struct {
bytes.Bytes
Version uint8
}
type PCICapabilities struct {
Standard map[uint8]*PCIStandardCapability
Extended map[uint16]*PCIExtendedCapability
}
func (cs *ConfigSpace) Read() (ConfigSpaceIO, error) {
config, err := ioutil.ReadFile(cs.Path)
if err != nil {
return nil, fmt.Errorf("failed to open file: %v", err)
}
return &configSpaceIO{bytes.New(&config)}, nil
}
func (cs *configSpaceIO) GetVendorID() uint16 {
return cs.Read16(0)
}
func (cs *configSpaceIO) GetDeviceID() uint16 {
return cs.Read16(2)
}
func (cs *configSpaceIO) GetPCICapabilities() (*PCICapabilities, error) {
caps := &PCICapabilities{
make(map[uint8]*PCIStandardCapability),
make(map[uint16]*PCIExtendedCapability),
}
soffset := cs.Read8(pciCapabilityListPointer)
if int(soffset) >= cs.Len() {
return nil, fmt.Errorf("capability list pointer out of bounds")
}
for soffset != 0 {
if soffset == 0xff {
return nil, fmt.Errorf("config space broken")
}
if int(soffset) >= pciCfgSpaceStandardSize {
return nil, fmt.Errorf("standard capability list pointer out of bounds")
}
data := cs.Read32(int(soffset))
id := uint8(data & 0xff)
caps.Standard[id] = &PCIStandardCapability{
cs.Slice(int(soffset), cs.Len()-int(soffset)),
}
soffset = uint8((data >> 8) & 0xff)
}
if cs.Len() <= pciCfgSpaceStandardSize {
return caps, nil
}
eoffset := uint16(pciCfgSpaceStandardSize)
for eoffset != 0 {
if eoffset == 0xffff {
return nil, fmt.Errorf("config space broken")
}
if int(eoffset) >= pciCfgSpaceExtendedSize {
return nil, fmt.Errorf("extended capability list pointer out of bounds")
}
data := cs.Read32(int(eoffset))
id := uint16(data & 0xffff)
version := uint8((data >> 16) & 0xf)
caps.Extended[id] = &PCIExtendedCapability{
cs.Slice(int(eoffset), cs.Len()-int(eoffset)),
version,
}
eoffset = uint16((data >> 4) & 0xffc)
}
return caps, nil
}

120
pkg/nvpci/mock.go Normal file
View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nvpci
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/bytes"
)
type MockA100 struct {
*nvpci
}
func (m *MockA100) Cleanup() {
os.RemoveAll(m.pciDevicesRoot)
}
var _ Interface = (*MockA100)(nil)
func NewMockA100() (mock *MockA100, rerr error) {
rootDir, err := ioutil.TempDir("", "")
if err != nil {
return nil, err
}
defer func() {
if rerr != nil {
os.RemoveAll(rootDir)
}
}()
deviceDir := filepath.Join(rootDir, "0000:80:05.1")
err = os.MkdirAll(deviceDir, 0755)
if err != nil {
return nil, err
}
vendor, err := os.Create(filepath.Join(deviceDir, "vendor"))
if err != nil {
return nil, err
}
_, err = vendor.WriteString(fmt.Sprintf("0x%x", pciNvidiaVendorID))
if err != nil {
return nil, err
}
class, err := os.Create(filepath.Join(deviceDir, "class"))
if err != nil {
return nil, err
}
_, err = class.WriteString(fmt.Sprintf("0x%x", pci3dControllerClass))
if err != nil {
return nil, err
}
device, err := os.Create(filepath.Join(deviceDir, "device"))
if err != nil {
return nil, err
}
_, err = device.WriteString("0x20bf")
if err != nil {
return nil, err
}
config, err := os.Create(filepath.Join(deviceDir, "config"))
if err != nil {
return nil, err
}
_data := make([]byte, pciCfgSpaceStandardSize)
data := bytes.New(&_data)
data.Write16(0, pciNvidiaVendorID)
data.Write16(2, uint16(0x20bf))
_, err = config.Write(*data.Raw())
if err != nil {
return nil, err
}
bar0 := []uint64{0x00000000c2000000, 0x00000000c2ffffff, 0x0000000000040200}
resource, err := os.Create(filepath.Join(deviceDir, "resource"))
_, err = resource.WriteString(fmt.Sprintf("0x%x 0x%x 0x%x", bar0[0], bar0[1], bar0[2]))
if err != nil {
return nil, err
}
pmcID := uint32(0x170000a1)
resource0, err := os.Create(filepath.Join(deviceDir, "resource0"))
if err != nil {
return nil, err
}
_data = make([]byte, bar0[1]-bar0[0]+1)
data = bytes.New(&_data).LittleEndian()
data.Write32(0, pmcID)
_, err = resource0.Write(*data.Raw())
if err != nil {
return nil, err
}
mock = &MockA100{
&nvpci{rootDir},
}
return mock, nil
}

273
pkg/nvpci/nvpci.go Normal file
View File

@ -0,0 +1,273 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nvpci
import (
"fmt"
"io/ioutil"
"os"
"path"
"sort"
"strconv"
"strings"
)
const (
// pciDevicesRoot represents base path for all pci devices under sysfs
pciDevicesRoot = "/sys/bus/pci/devices"
// pciNvidiaVendorID represents PCI vendor id for NVIDIA
pciNvidiaVendorID uint16 = 0x10de
// pciVgaControllerClass represents the PCI class for VGA Controllers
pciVgaControllerClass uint32 = 0x030000
// pci3dControllerClass represents the PCI class for 3D Graphics accellerators
pci3dControllerClass uint32 = 0x030200
// pciNvSwitchClass represents the PCI class for NVSwitches
pciNvSwitchClass uint32 = 0x068000
)
// Interface allows us to get a list of all NVIDIA PCI devices
type Interface interface {
GetAllDevices() ([]*NvidiaPCIDevice, error)
Get3DControllers() ([]*NvidiaPCIDevice, error)
GetVGAControllers() ([]*NvidiaPCIDevice, error)
GetNVSwitches() ([]*NvidiaPCIDevice, error)
GetGPUs() ([]*NvidiaPCIDevice, error)
}
type nvpci struct {
pciDevicesRoot string
}
var _ Interface = (*nvpci)(nil)
// NvidiaPCIDevice represents a PCI device for an NVIDIA product
type NvidiaPCIDevice struct {
Path string
Address string
Vendor uint16
Class uint32
Device uint16
Config *ConfigSpace
Resources map[int]*MemoryResource
}
func (d *NvidiaPCIDevice) IsVGAController() bool {
return d.Class == pciVgaControllerClass
}
func (d *NvidiaPCIDevice) Is3DController() bool {
return d.Class == pci3dControllerClass
}
func (d *NvidiaPCIDevice) IsNVSwitch() bool {
return d.Class == pciNvSwitchClass
}
func (d *NvidiaPCIDevice) IsGPU() bool {
return d.IsVGAController() || d.Is3DController()
}
func (d *NvidiaPCIDevice) IsResetAvailable() bool {
_, err := os.Stat(path.Join(d.Path, "reset"))
if err != nil {
return false
}
return true
}
func (d *NvidiaPCIDevice) Reset() error {
err := ioutil.WriteFile(path.Join(d.Path, "reset"), []byte("1"), 0)
if err != nil {
return fmt.Errorf("unable to write to reset file: %v", err)
}
return nil
}
func New() Interface {
return &nvpci{pciDevicesRoot}
}
// GetAllDevices returns all Nvidia PCI devices on the system
func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) {
deviceDirs, err := ioutil.ReadDir(p.pciDevicesRoot)
if err != nil {
return nil, fmt.Errorf("unable to read PCI bus devices: %v", err)
}
var nvdevices []*NvidiaPCIDevice
for _, deviceDir := range deviceDirs {
devicePath := path.Join(p.pciDevicesRoot, deviceDir.Name())
address := deviceDir.Name()
vendor, err := ioutil.ReadFile(path.Join(devicePath, "vendor"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI device vendor id for %s: %v", address, err)
}
vendorStr := strings.TrimSpace(string(vendor))
vendorID, err := strconv.ParseUint(vendorStr, 0, 16)
if err != nil {
return nil, fmt.Errorf("unable to convert vendor string to uint16: %v", vendorStr)
}
if uint16(vendorID) != pciNvidiaVendorID {
continue
}
class, err := ioutil.ReadFile(path.Join(devicePath, "class"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI device class for %s: %v", address, err)
}
classStr := strings.TrimSpace(string(class))
classID, err := strconv.ParseUint(classStr, 0, 32)
if err != nil {
return nil, fmt.Errorf("unable to convert class string to uint32: %v", classStr)
}
device, err := ioutil.ReadFile(path.Join(devicePath, "device"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI device id for %s: %v", address, err)
}
deviceStr := strings.TrimSpace(string(device))
deviceID, err := strconv.ParseUint(deviceStr, 0, 16)
if err != nil {
return nil, fmt.Errorf("unable to convert device string to uint16: %v", deviceStr)
}
config := &ConfigSpace{
Path: path.Join(devicePath, "config"),
}
resource, err := ioutil.ReadFile(path.Join(devicePath, "resource"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI resource file for %s: %v", address, err)
}
resources := make(map[int]*MemoryResource)
for i, line := range strings.Split(strings.TrimSpace(string(resource)), "\n") {
values := strings.Split(line, " ")
if len(values) != 3 {
return nil, fmt.Errorf("more than 3 entries in line '%d' of resource file", i)
}
start, _ := strconv.ParseUint(values[0], 0, 64)
end, _ := strconv.ParseUint(values[1], 0, 64)
flags, _ := strconv.ParseUint(values[2], 0, 64)
if (end - start) != 0 {
resources[i] = &MemoryResource{
uintptr(start),
uintptr(end),
flags,
fmt.Sprintf("%s/resource%d", devicePath, i),
}
}
}
nvdevice := &NvidiaPCIDevice{
Path: devicePath,
Address: address,
Vendor: uint16(vendorID),
Class: uint32(classID),
Device: uint16(deviceID),
Config: config,
Resources: resources,
}
nvdevices = append(nvdevices, nvdevice)
}
addressToId := func(address string) uint64 {
address = strings.ReplaceAll(address, ":", "")
address = strings.ReplaceAll(address, ".", "")
id, _ := strconv.ParseUint(address, 16, 64)
return id
}
sort.Slice(nvdevices, func(i, j int) bool {
return addressToId(nvdevices[i].Address) < addressToId(nvdevices[j].Address)
})
return nvdevices, nil
}
// Get3DControllers returns all NVIDIA 3D Controller PCI devices on the system
func (p *nvpci) Get3DControllers() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
return nil, fmt.Errorf("error getting all NVIDIA devices: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.Is3DController() {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// GetVGAControllers returns all NVIDIA VGA Controller PCI devices on the system
func (p *nvpci) GetVGAControllers() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
return nil, fmt.Errorf("error getting all NVIDIA devices: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsVGAController() {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// GetNVSwitches returns all NVIDIA NVSwitch PCI devices on the system
func (p *nvpci) GetNVSwitches() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
return nil, fmt.Errorf("error getting all NVIDIA devices: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsNVSwitch() {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// GetGPUs returns all NVIDIA GPU devices on the system
func (p *nvpci) GetGPUs() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
return nil, fmt.Errorf("error getting all NVIDIA devices: %v", err)
}
var filtered []*NvidiaPCIDevice
for _, d := range devices {
if d.IsGPU() {
filtered = append(filtered, d)
}
}
return filtered, nil
}

60
pkg/nvpci/nvpci_test.go Normal file
View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nvpci
import (
"testing"
"github.com/stretchr/testify/require"
)
const (
ga100PmcID = uint32(0x170000a1)
)
func TestNvpci(t *testing.T) {
nvpci, err := NewMockA100()
require.Nil(t, err, "Error creating NewMockA100")
defer nvpci.Cleanup()
devices, err := nvpci.GetGPUs()
require.Nil(t, err, "Error getting GPUs")
require.Equal(t, 1, len(devices), "Wrong number of GPU devices")
require.Equal(t, 1, len(devices[0].Resources), "Wrong number GPU resources found")
config, err := devices[0].Config.Read()
require.Nil(t, err, "Error reading config")
require.Equal(t, devices[0].Vendor, config.GetVendorID(), "Vendor IDs do not match")
require.Equal(t, devices[0].Device, config.GetDeviceID(), "Device IDs do not match")
capabilities, err := config.GetPCICapabilities()
require.Nil(t, err, "Error getting PCI capabilities")
require.Equal(t, 0, len(capabilities.Standard), "Wrong number of standard PCI capabilities")
require.Equal(t, 0, len(capabilities.Extended), "Wrong number of extended PCI capabilities")
resource0 := devices[0].Resources[0]
bar0, err := resource0.Open()
require.Nil(t, err, "Error opening bar0")
defer func() {
err := bar0.Close()
if err != nil {
t.Errorf("Error closing bar0: %v", err)
}
}()
require.Equal(t, int(resource0.End-resource0.Start+1), bar0.Len())
require.Equal(t, ga100PmcID, bar0.Read32(0))
}

64
pkg/nvpci/resources.go Normal file
View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nvpci
import (
"fmt"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/mmio"
)
const (
pmcEndianRegister = 0x4
pmcLittleEndian = 0x0
pmcBigEndian = 0x01000001
)
type MemoryResource struct {
Start uintptr
End uintptr
Flags uint64
Path string
}
func (mr *MemoryResource) Open() (mmio.Mmio, error) {
rw, err := mmio.OpenRW(mr.Path, 0, int(mr.End-mr.Start+1))
if err != nil {
return nil, fmt.Errorf("failed to open file for mmio: %v\n", err)
}
switch rw.Read32(pmcEndianRegister) {
case pmcBigEndian:
return rw.BigEndian(), nil
case pmcLittleEndian:
return rw.LittleEndian(), nil
}
return nil, fmt.Errorf("unknown endianness for mmio: %v\n", err)
}
func (mr *MemoryResource) OpenReadOnly() (mmio.Mmio, error) {
ro, err := mmio.OpenRO(mr.Path, 0, int(mr.End-mr.Start+1))
if err != nil {
return nil, fmt.Errorf("failed to open file for mmio: %v\n", err)
}
switch ro.Read32(pmcEndianRegister) {
case pmcBigEndian:
return ro.BigEndian(), nil
case pmcLittleEndian:
return ro.LittleEndian(), nil
}
return nil, fmt.Errorf("unknown endianness for mmio: %v\n", err)
}