Merge branch 'iommu-group' into 'main'

Detect iommu_group for PCI and mdev devices

See merge request nvidia/cloud-native/go-nvlib!12
This commit is contained in:
Christopher Desiniotis 2022-07-25 23:20:03 +00:00
commit c7f47cb02a
6 changed files with 114 additions and 31 deletions

View File

@ -100,6 +100,24 @@ func (m *MockNvmdev) AddMockA100Parent(address string, numaNode int) error {
return err
}
_, err = os.Create(filepath.Join(deviceDir, "nvidia"))
if err != nil {
return err
}
err = os.Symlink(filepath.Join(deviceDir, "nvidia"), filepath.Join(deviceDir, "driver"))
if err != nil {
return err
}
_, err = os.Create(filepath.Join(deviceDir, "20"))
if err != nil {
return err
}
err = os.Symlink(filepath.Join(deviceDir, "20"), filepath.Join(deviceDir, "iommu_group"))
if err != nil {
return err
}
numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
if err != nil {
return err
@ -201,6 +219,12 @@ func (m *MockNvmdev) AddMockA100Mdev(uuid string, mdevType string, mdevTypeDir s
return err
}
err = os.Symlink(filepath.Join(mdevDeviceDir, "vfio_mdev"), filepath.Join(mdevDeviceDir, "driver"))
_, err = os.Create(filepath.Join(mdevDeviceDir, "200"))
if err != nil {
return err
}
err = os.Symlink(filepath.Join(mdevDeviceDir, "200"), filepath.Join(mdevDeviceDir, "iommu_group"))
if err != nil {
return err
}

View File

@ -53,11 +53,12 @@ type ParentDevice struct {
// Device represents an NVIDIA MDEV (vGPU) device
type Device struct {
Path string
UUID string
MDEVType string
Driver string
Parent *ParentDevice
Path string
UUID string
MDEVType string
Driver string
IommuGroup int
Parent *ParentDevice
}
// New interface that allows us to get a list of all NVIDIA parent and MDEV (vGPU) devices
@ -149,12 +150,18 @@ func NewDevice(root string, uuid string) (*Device, error) {
return nil, fmt.Errorf("error detecting driver: %v", err)
}
iommuGroup, err := m.iommuGroup()
if err != nil {
return nil, fmt.Errorf("error getting iommu_group: %v", err)
}
device := Device{
Path: path,
UUID: uuid,
MDEVType: mdevType,
Driver: driver,
Parent: parent,
Path: path,
UUID: uuid,
MDEVType: mdevType,
Driver: driver,
IommuGroup: iommuGroup,
Parent: parent,
}
return &device, nil
@ -175,15 +182,25 @@ func newMdev(devicePath string) (mdev, error) {
func (m mdev) String() string {
return string(m)
}
func (m mdev) resolve(target string) (string, error) {
resolved, err := filepath.EvalSymlinks(path.Join(string(m), target))
if err != nil {
return "", fmt.Errorf("error resolving %q: %v", target, err)
}
return resolved, nil
}
func (m mdev) parentDevicePath() string {
// /sys/bus/pci/devices/<addr>/<uuid>
return path.Dir(string(m))
}
func (m mdev) Type() (string, error) {
mdevTypeDir, err := filepath.EvalSymlinks(path.Join(string(m), "mdev_type"))
mdevTypeDir, err := m.resolve("mdev_type")
if err != nil {
return "", fmt.Errorf("error resolving mdev_type link for mdev %s: %v", m, err)
return "", err
}
mdevType, err := os.ReadFile(path.Join(mdevTypeDir, "name"))
@ -201,13 +218,27 @@ func (m mdev) Type() (string, error) {
}
func (m mdev) driver() (string, error) {
driver, err := filepath.EvalSymlinks(path.Join(string(m), "driver"))
driver, err := m.resolve("driver")
if err != nil {
return "", err
}
return filepath.Base(driver), nil
}
func (m mdev) iommuGroup() (int, error) {
iommu, err := m.resolve("iommu_group")
if err != nil {
return -1, err
}
iommuGroupStr := strings.TrimSpace(filepath.Base(iommu))
iommuGroup, err := strconv.ParseInt(iommuGroupStr, 0, 64)
if err != nil {
return -1, fmt.Errorf("unable to convert iommu_group string to int64: %v", iommuGroupStr)
}
return int(iommuGroup), nil
}
// NewParentDevice constructs a ParentDevice
func NewParentDevice(devicePath string) (*ParentDevice, error) {
nvdevice, err := nvpci.NewDevice(devicePath)

View File

@ -46,5 +46,7 @@ func TestNvmdev(t *testing.T) {
mdevs, err := nvmdev.GetAllDevices()
require.Nil(t, err, "Error getting NVIDIA MDEV (vGPU) devices")
require.Equal(t, 1, len(mdevs), "Wrong number of NVIDIA MDEV (vGPU) devices")
require.Equal(t, "A100-4C", mdevs[0].MDEVType, "Wrong value for mdev_type")
require.Equal(t, "vfio_mdev", mdevs[0].Driver, "Wrong driver detected for mdev device")
require.Equal(t, 200, mdevs[0].IommuGroup, "Wrong value for iommu_group")
}

View File

@ -99,6 +99,15 @@ func (m *MockNvpci) AddMockA100(address string, numaNode int) error {
return err
}
_, err = os.Create(filepath.Join(deviceDir, "20"))
if err != nil {
return err
}
err = os.Symlink(filepath.Join(deviceDir, "20"), filepath.Join(deviceDir, "iommu_group"))
if err != nil {
return err
}
numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
if err != nil {
return err

View File

@ -65,15 +65,16 @@ var _ ResourceInterface = (*MemoryResources)(nil)
// NvidiaPCIDevice represents a PCI device for an NVIDIA product
type NvidiaPCIDevice struct {
Path string
Address string
Vendor uint16
Class uint32
Device uint16
Driver string
NumaNode int
Config *ConfigSpace
Resources MemoryResources
Path string
Address string
Vendor uint16
Class uint32
Device uint16
Driver string
IommuGroup int
NumaNode int
Config *ConfigSpace
Resources MemoryResources
}
// IsVGAController if class == 0x300
@ -203,6 +204,20 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
return nil, fmt.Errorf("unable to detect driver for %s: %v", address, err)
}
var iommuGroup int64
iommu, err := filepath.EvalSymlinks(path.Join(devicePath, "iommu_group"))
if err == nil {
iommuGroupStr := strings.TrimSpace(filepath.Base(iommu))
iommuGroup, err = strconv.ParseInt(iommuGroupStr, 0, 64)
if err != nil {
return nil, fmt.Errorf("unable to convert iommu_group string to int64: %v", iommuGroupStr)
}
} else if os.IsNotExist(err) {
iommuGroup = -1
} else {
return nil, fmt.Errorf("unable to detect iommu_group for %s: %v", address, err)
}
numa, err := os.ReadFile(path.Join(devicePath, "numa_node"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI NUMA node for %s: %v", address, err)
@ -244,15 +259,16 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
}
nvdevice := &NvidiaPCIDevice{
Path: devicePath,
Address: address,
Vendor: uint16(vendorID),
Class: uint32(classID),
Device: uint16(deviceID),
Driver: driver,
NumaNode: int(numaNode),
Config: config,
Resources: resources,
Path: devicePath,
Address: address,
Vendor: uint16(vendorID),
Class: uint32(classID),
Device: uint16(deviceID),
Driver: driver,
IommuGroup: int(iommuGroup),
NumaNode: int(numaNode),
Config: config,
Resources: resources,
}
return nvdevice, nil

View File

@ -46,6 +46,7 @@ func TestNvpci(t *testing.T) {
require.Equal(t, devices[0].Vendor, config.GetVendorID(), "Vendor IDs do not match")
require.Equal(t, devices[0].Device, config.GetDeviceID(), "Device IDs do not match")
require.Equal(t, "nvidia", devices[0].Driver, "Wrong driver detected for device")
require.Equal(t, 20, devices[0].IommuGroup, "Wrong iommu_group detected for device")
capabilities, err := config.GetPCICapabilities()
require.Nil(t, err, "Error getting PCI capabilities")