Merge branch 'driver-detection' into 'main'

Detect driver bound to an NvidiaPCIDevice and mdev device

See merge request nvidia/cloud-native/go-nvlib!11
This commit is contained in:
Christopher Desiniotis 2022-07-14 20:39:17 +00:00
commit f281b5e581
8 changed files with 87 additions and 33 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
*.swp
*.swo
*.test

View File

@ -20,7 +20,6 @@ import (
"fmt"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci/bytes"
"io/ioutil"
"os"
"path/filepath"
)
@ -34,7 +33,7 @@ var _ Interface = (*MockNvmdev)(nil)
// NewMock creates new mock mediated (vGPU) and parent PCI devices and removes old devices
func NewMock() (mock *MockNvmdev, rerr error) {
mdevParentsRootDir, err := ioutil.TempDir("", "")
mdevParentsRootDir, err := os.MkdirTemp(os.TempDir(), "")
if err != nil {
return nil, err
}
@ -43,7 +42,7 @@ func NewMock() (mock *MockNvmdev, rerr error) {
os.RemoveAll(mdevParentsRootDir)
}
}()
mdevDevicesRootDir, err := ioutil.TempDir("", "")
mdevDevicesRootDir, err := os.MkdirTemp(os.TempDir(), "")
if err != nil {
return nil, err
}
@ -184,14 +183,29 @@ func (m *MockNvmdev) AddMockA100Parent(address string, numaNode int) error {
// AddMockA100Mdev creates an A100 like MDEV (vGPU) mock device.
// The corresponding mocked parent A100 device must be created beforehand.
func (m *MockNvmdev) AddMockA100Mdev(uuid string, mdevType string, parentMdevTypeDir string) error {
deviceDir := filepath.Join(m.mdevDevicesRoot, uuid)
err := os.MkdirAll(deviceDir, 0755)
func (m *MockNvmdev) AddMockA100Mdev(uuid string, mdevType string, mdevTypeDir string, parentDeviceDir string) error {
mdevDeviceDir := filepath.Join(parentDeviceDir, uuid)
err := os.Mkdir(mdevDeviceDir, 0755)
if err != nil {
return err
}
err = os.Symlink(parentMdevTypeDir, filepath.Join(deviceDir, "mdev_type"))
parentMdevTypeDir := filepath.Join(parentDeviceDir, "mdev_supported_types", mdevTypeDir)
err = os.Symlink(parentMdevTypeDir, filepath.Join(mdevDeviceDir, "mdev_type"))
if err != nil {
return err
}
_, err = os.Create(filepath.Join(mdevDeviceDir, "vfio_mdev"))
if err != nil {
return err
}
err = os.Symlink(filepath.Join(mdevDeviceDir, "vfio_mdev"), filepath.Join(mdevDeviceDir, "driver"))
if err != nil {
return err
}
err = os.Symlink(mdevDeviceDir, filepath.Join(m.mdevDevicesRoot, uuid))
if err != nil {
return err
}

View File

@ -19,7 +19,6 @@ package nvmdev
import (
"fmt"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
"io/ioutil"
"os"
"path"
"path/filepath"
@ -57,6 +56,7 @@ type Device struct {
Path string
UUID string
MDEVType string
Driver string
Parent *ParentDevice
}
@ -67,7 +67,7 @@ func New() Interface {
// GetAllParentDevices returns all NVIDIA Parent PCI devices on the system
func (m *nvmdev) GetAllParentDevices() ([]*ParentDevice, error) {
deviceDirs, err := ioutil.ReadDir(m.mdevParentsRoot)
deviceDirs, err := os.ReadDir(m.mdevParentsRoot)
if err != nil {
return nil, fmt.Errorf("unable to read PCI bus devices: %v", err)
}
@ -101,7 +101,7 @@ func (m *nvmdev) GetAllParentDevices() ([]*ParentDevice, error) {
// GetAllDevices returns all NVIDIA mdev (vGPU) devices on the system
func (m *nvmdev) GetAllDevices() ([]*Device, error) {
deviceDirs, err := ioutil.ReadDir(m.mdevDevicesRoot)
deviceDirs, err := os.ReadDir(m.mdevDevicesRoot)
if err != nil {
return nil, fmt.Errorf("unable to read MDEV devices directory: %v", err)
}
@ -144,37 +144,49 @@ func NewDevice(root string, uuid string) (*Device, error) {
return nil, fmt.Errorf("error getting mdev type: %v", err)
}
driver, err := m.driver()
if err != nil {
return nil, fmt.Errorf("error detecting driver: %v", err)
}
device := Device{
Path: path,
UUID: uuid,
MDEVType: mdevType,
Driver: driver,
Parent: parent,
}
return &device, nil
}
// mdev represents the path to an NVIDIA mdev (vGPU) device.
type mdev string
func newMdev(devicePath string) (mdev, error) {
mdevTypeDir, err := filepath.EvalSymlinks(path.Join(devicePath, "mdev_type"))
mdevDir, err := filepath.EvalSymlinks(devicePath)
if err != nil {
return "", fmt.Errorf("error resolving mdev_type link: %v", err)
return "", fmt.Errorf("error resolving symlink for %s: %v", devicePath, err)
}
return mdev(mdevTypeDir), nil
return mdev(mdevDir), nil
}
func (m mdev) String() string {
return string(m)
}
func (m mdev) parentDevicePath() string {
// /sys/bus/pci/devices/<addr>/mdev_supported_types/<mdev_type>
return path.Dir(path.Dir(string(m)))
// /sys/bus/pci/devices/<addr>/<uuid>
return path.Dir(string(m))
}
func (m mdev) Type() (string, error) {
mdevType, err := ioutil.ReadFile(path.Join(string(m), "name"))
mdevTypeDir, err := filepath.EvalSymlinks(path.Join(string(m), "mdev_type"))
if err != nil {
return "", fmt.Errorf("error resolving mdev_type link for mdev %s: %v", m, err)
}
mdevType, err := os.ReadFile(path.Join(mdevTypeDir, "name"))
if err != nil {
return "", fmt.Errorf("unable to read mdev_type name for mdev %s: %v", m, err)
}
@ -188,6 +200,14 @@ func (m mdev) Type() (string, error) {
return mdevTypeSplit[1], nil
}
func (m mdev) driver() (string, error) {
driver, err := filepath.EvalSymlinks(path.Join(string(m), "driver"))
if err != nil {
return "", err
}
return filepath.Base(driver), nil
}
// NewParentDevice constructs a ParentDevice
func NewParentDevice(devicePath string) (*ParentDevice, error) {
nvdevice, err := nvpci.NewDevice(devicePath)
@ -205,7 +225,7 @@ func NewParentDevice(devicePath string) (*ParentDevice, error) {
}
mdevTypesMap := make(map[string]string)
for _, path := range paths {
name, err := ioutil.ReadFile(path)
name, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("unable to read file %s: %v", path, err)
}
@ -292,7 +312,7 @@ func (p *ParentDevice) GetAvailableMDEVInstances(mdevType string) (int, error) {
return -1, nil
}
available, err := ioutil.ReadFile(filepath.Join(mdevPath, "available_instances"))
available, err := os.ReadFile(filepath.Join(mdevPath, "available_instances"))
if err != nil {
return -1, fmt.Errorf("unable to read available_instances file: %v", err)
}

View File

@ -18,7 +18,6 @@ package nvmdev
import (
"github.com/stretchr/testify/require"
"path/filepath"
"testing"
)
@ -41,11 +40,11 @@ func TestNvmdev(t *testing.T) {
require.Nil(t, err, "Error checking if A100-4Q vGPU type is available for creation")
require.True(t, available, "A100-4C should be available to create")
err = nvmdev.AddMockA100Mdev("b1914f0a-15cf-416e-8967-55fc7cb68e20", "A100-4C",
filepath.Join(parentDevs[0].Path, "mdev_supported_types/nvidia-500"))
err = nvmdev.AddMockA100Mdev("b1914f0a-15cf-416e-8967-55fc7cb68e20", "A100-4C", "nvidia-500", parentDevs[0].Path)
require.Nil(t, err, "Error adding Mock A100 mediated device")
mdevs, err := nvmdev.GetAllDevices()
require.Nil(t, err, "Error getting NVIDIA MDEV (vGPU) devices")
require.Equal(t, 1, len(mdevs), "Wrong number of NVIDIA MDEV (vGPU) devices")
require.Equal(t, "vfio_mdev", mdevs[0].Driver, "Wrong driver detected for mdev device")
}

View File

@ -18,7 +18,7 @@ package nvpci
import (
"fmt"
"io/ioutil"
"os"
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci/bytes"
)
@ -71,7 +71,7 @@ type PCICapabilities struct {
}
func (cs *ConfigSpace) Read() (ConfigSpaceIO, error) {
config, err := ioutil.ReadFile(cs.Path)
config, err := os.ReadFile(cs.Path)
if err != nil {
return nil, fmt.Errorf("failed to open file: %v", err)
}

View File

@ -18,7 +18,6 @@ package nvpci
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
@ -34,7 +33,7 @@ var _ Interface = (*MockNvpci)(nil)
// NewMockNvpci create new mock PCI and remove old devices
func NewMockNvpci() (mock *MockNvpci, rerr error) {
rootDir, err := ioutil.TempDir("", "")
rootDir, err := os.MkdirTemp(os.TempDir(), "")
if err != nil {
return nil, err
}
@ -91,6 +90,15 @@ func (m *MockNvpci) AddMockA100(address string, numaNode int) error {
return err
}
_, err = os.Create(filepath.Join(deviceDir, "nvidia"))
if err != nil {
return err
}
err = os.Symlink(filepath.Join(deviceDir, "nvidia"), filepath.Join(deviceDir, "driver"))
if err != nil {
return err
}
numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
if err != nil {
return err

View File

@ -18,9 +18,9 @@ package nvpci
import (
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"sort"
"strconv"
"strings"
@ -70,6 +70,7 @@ type NvidiaPCIDevice struct {
Vendor uint16
Class uint32
Device uint16
Driver string
NumaNode int
Config *ConfigSpace
Resources MemoryResources
@ -104,7 +105,7 @@ func (d *NvidiaPCIDevice) IsResetAvailable() bool {
// Reset perform a reset to apply a new configuration at HW level
func (d *NvidiaPCIDevice) Reset() error {
err := ioutil.WriteFile(path.Join(d.Path, "reset"), []byte("1"), 0)
err := os.WriteFile(path.Join(d.Path, "reset"), []byte("1"), 0)
if err != nil {
return fmt.Errorf("unable to write to reset file: %v", err)
}
@ -123,7 +124,7 @@ func NewFrom(root string) Interface {
// GetAllDevices returns all Nvidia PCI devices on the system
func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) {
deviceDirs, err := ioutil.ReadDir(p.pciDevicesRoot)
deviceDirs, err := os.ReadDir(p.pciDevicesRoot)
if err != nil {
return nil, fmt.Errorf("unable to read PCI bus devices: %v", err)
}
@ -159,7 +160,7 @@ func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) {
func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
address := path.Base(devicePath)
vendor, err := ioutil.ReadFile(path.Join(devicePath, "vendor"))
vendor, err := os.ReadFile(path.Join(devicePath, "vendor"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI device vendor id for %s: %v", address, err)
}
@ -173,7 +174,7 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
return nil, nil
}
class, err := ioutil.ReadFile(path.Join(devicePath, "class"))
class, err := os.ReadFile(path.Join(devicePath, "class"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI device class for %s: %v", address, err)
}
@ -183,7 +184,7 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
return nil, fmt.Errorf("unable to convert class string to uint32: %v", classStr)
}
device, err := ioutil.ReadFile(path.Join(devicePath, "device"))
device, err := os.ReadFile(path.Join(devicePath, "device"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI device id for %s: %v", address, err)
}
@ -193,7 +194,16 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
return nil, fmt.Errorf("unable to convert device string to uint16: %v", deviceStr)
}
numa, err := ioutil.ReadFile(path.Join(devicePath, "numa_node"))
driver, err := filepath.EvalSymlinks(path.Join(devicePath, "driver"))
if err == nil {
driver = filepath.Base(driver)
} else if os.IsNotExist(err) {
driver = ""
} else {
return nil, fmt.Errorf("unable to detect driver for %s: %v", address, err)
}
numa, err := os.ReadFile(path.Join(devicePath, "numa_node"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI NUMA node for %s: %v", address, err)
}
@ -207,7 +217,7 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
Path: path.Join(devicePath, "config"),
}
resource, err := ioutil.ReadFile(path.Join(devicePath, "resource"))
resource, err := os.ReadFile(path.Join(devicePath, "resource"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI resource file for %s: %v", address, err)
}
@ -239,6 +249,7 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
Vendor: uint16(vendorID),
Class: uint32(classID),
Device: uint16(deviceID),
Driver: driver,
NumaNode: int(numaNode),
Config: config,
Resources: resources,

View File

@ -45,6 +45,7 @@ func TestNvpci(t *testing.T) {
require.Nil(t, err, "Error reading config")
require.Equal(t, devices[0].Vendor, config.GetVendorID(), "Vendor IDs do not match")
require.Equal(t, devices[0].Device, config.GetDeviceID(), "Device IDs do not match")
require.Equal(t, "nvidia", devices[0].Driver, "Wrong driver detected for device")
capabilities, err := config.GetPCICapabilities()
require.Nil(t, err, "Error getting PCI capabilities")