mirror of
https://github.com/clearml/go-nvlib
synced 2025-04-21 06:34:32 +00:00
Detect iommu_group for PCI and mdev devices
This commit is contained in:
parent
f281b5e581
commit
f52cd402a1
@ -100,6 +100,24 @@ func (m *MockNvmdev) AddMockA100Parent(address string, numaNode int) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_, err = os.Create(filepath.Join(deviceDir, "nvidia"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = os.Symlink(filepath.Join(deviceDir, "nvidia"), filepath.Join(deviceDir, "driver"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = os.Create(filepath.Join(deviceDir, "20"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = os.Symlink(filepath.Join(deviceDir, "20"), filepath.Join(deviceDir, "iommu_group"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
|
numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -201,6 +219,12 @@ func (m *MockNvmdev) AddMockA100Mdev(uuid string, mdevType string, mdevTypeDir s
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = os.Symlink(filepath.Join(mdevDeviceDir, "vfio_mdev"), filepath.Join(mdevDeviceDir, "driver"))
|
err = os.Symlink(filepath.Join(mdevDeviceDir, "vfio_mdev"), filepath.Join(mdevDeviceDir, "driver"))
|
||||||
|
|
||||||
|
_, err = os.Create(filepath.Join(mdevDeviceDir, "200"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = os.Symlink(filepath.Join(mdevDeviceDir, "200"), filepath.Join(mdevDeviceDir, "iommu_group"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -57,6 +57,7 @@ type Device struct {
|
|||||||
UUID string
|
UUID string
|
||||||
MDEVType string
|
MDEVType string
|
||||||
Driver string
|
Driver string
|
||||||
|
IommuGroup int
|
||||||
Parent *ParentDevice
|
Parent *ParentDevice
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,11 +150,17 @@ func NewDevice(root string, uuid string) (*Device, error) {
|
|||||||
return nil, fmt.Errorf("error detecting driver: %v", err)
|
return nil, fmt.Errorf("error detecting driver: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
iommuGroup, err := m.iommuGroup()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error getting iommu_group: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
device := Device{
|
device := Device{
|
||||||
Path: path,
|
Path: path,
|
||||||
UUID: uuid,
|
UUID: uuid,
|
||||||
MDEVType: mdevType,
|
MDEVType: mdevType,
|
||||||
Driver: driver,
|
Driver: driver,
|
||||||
|
IommuGroup: iommuGroup,
|
||||||
Parent: parent,
|
Parent: parent,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,15 +182,25 @@ func newMdev(devicePath string) (mdev, error) {
|
|||||||
func (m mdev) String() string {
|
func (m mdev) String() string {
|
||||||
return string(m)
|
return string(m)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m mdev) resolve(target string) (string, error) {
|
||||||
|
resolved, err := filepath.EvalSymlinks(path.Join(string(m), target))
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("error resolving %q: %v", target, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resolved, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (m mdev) parentDevicePath() string {
|
func (m mdev) parentDevicePath() string {
|
||||||
// /sys/bus/pci/devices/<addr>/<uuid>
|
// /sys/bus/pci/devices/<addr>/<uuid>
|
||||||
return path.Dir(string(m))
|
return path.Dir(string(m))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m mdev) Type() (string, error) {
|
func (m mdev) Type() (string, error) {
|
||||||
mdevTypeDir, err := filepath.EvalSymlinks(path.Join(string(m), "mdev_type"))
|
mdevTypeDir, err := m.resolve("mdev_type")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error resolving mdev_type link for mdev %s: %v", m, err)
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
mdevType, err := os.ReadFile(path.Join(mdevTypeDir, "name"))
|
mdevType, err := os.ReadFile(path.Join(mdevTypeDir, "name"))
|
||||||
@ -201,13 +218,27 @@ func (m mdev) Type() (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m mdev) driver() (string, error) {
|
func (m mdev) driver() (string, error) {
|
||||||
driver, err := filepath.EvalSymlinks(path.Join(string(m), "driver"))
|
driver, err := m.resolve("driver")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return filepath.Base(driver), nil
|
return filepath.Base(driver), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m mdev) iommuGroup() (int, error) {
|
||||||
|
iommu, err := m.resolve("iommu_group")
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
iommuGroupStr := strings.TrimSpace(filepath.Base(iommu))
|
||||||
|
iommuGroup, err := strconv.ParseInt(iommuGroupStr, 0, 64)
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("unable to convert iommu_group string to int64: %v", iommuGroupStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(iommuGroup), nil
|
||||||
|
}
|
||||||
|
|
||||||
// NewParentDevice constructs a ParentDevice
|
// NewParentDevice constructs a ParentDevice
|
||||||
func NewParentDevice(devicePath string) (*ParentDevice, error) {
|
func NewParentDevice(devicePath string) (*ParentDevice, error) {
|
||||||
nvdevice, err := nvpci.NewDevice(devicePath)
|
nvdevice, err := nvpci.NewDevice(devicePath)
|
||||||
|
@ -46,5 +46,7 @@ func TestNvmdev(t *testing.T) {
|
|||||||
mdevs, err := nvmdev.GetAllDevices()
|
mdevs, err := nvmdev.GetAllDevices()
|
||||||
require.Nil(t, err, "Error getting NVIDIA MDEV (vGPU) devices")
|
require.Nil(t, err, "Error getting NVIDIA MDEV (vGPU) devices")
|
||||||
require.Equal(t, 1, len(mdevs), "Wrong number of NVIDIA MDEV (vGPU) devices")
|
require.Equal(t, 1, len(mdevs), "Wrong number of NVIDIA MDEV (vGPU) devices")
|
||||||
|
require.Equal(t, "A100-4C", mdevs[0].MDEVType, "Wrong value for mdev_type")
|
||||||
require.Equal(t, "vfio_mdev", mdevs[0].Driver, "Wrong driver detected for mdev device")
|
require.Equal(t, "vfio_mdev", mdevs[0].Driver, "Wrong driver detected for mdev device")
|
||||||
|
require.Equal(t, 200, mdevs[0].IommuGroup, "Wrong value for iommu_group")
|
||||||
}
|
}
|
||||||
|
@ -99,6 +99,15 @@ func (m *MockNvpci) AddMockA100(address string, numaNode int) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_, err = os.Create(filepath.Join(deviceDir, "20"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = os.Symlink(filepath.Join(deviceDir, "20"), filepath.Join(deviceDir, "iommu_group"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
|
numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -71,6 +71,7 @@ type NvidiaPCIDevice struct {
|
|||||||
Class uint32
|
Class uint32
|
||||||
Device uint16
|
Device uint16
|
||||||
Driver string
|
Driver string
|
||||||
|
IommuGroup int
|
||||||
NumaNode int
|
NumaNode int
|
||||||
Config *ConfigSpace
|
Config *ConfigSpace
|
||||||
Resources MemoryResources
|
Resources MemoryResources
|
||||||
@ -203,6 +204,20 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
|
|||||||
return nil, fmt.Errorf("unable to detect driver for %s: %v", address, err)
|
return nil, fmt.Errorf("unable to detect driver for %s: %v", address, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var iommuGroup int64
|
||||||
|
iommu, err := filepath.EvalSymlinks(path.Join(devicePath, "iommu_group"))
|
||||||
|
if err == nil {
|
||||||
|
iommuGroupStr := strings.TrimSpace(filepath.Base(iommu))
|
||||||
|
iommuGroup, err = strconv.ParseInt(iommuGroupStr, 0, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to convert iommu_group string to int64: %v", iommuGroupStr)
|
||||||
|
}
|
||||||
|
} else if os.IsNotExist(err) {
|
||||||
|
iommuGroup = -1
|
||||||
|
} else {
|
||||||
|
return nil, fmt.Errorf("unable to detect iommu_group for %s: %v", address, err)
|
||||||
|
}
|
||||||
|
|
||||||
numa, err := os.ReadFile(path.Join(devicePath, "numa_node"))
|
numa, err := os.ReadFile(path.Join(devicePath, "numa_node"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to read PCI NUMA node for %s: %v", address, err)
|
return nil, fmt.Errorf("unable to read PCI NUMA node for %s: %v", address, err)
|
||||||
@ -250,6 +265,7 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) {
|
|||||||
Class: uint32(classID),
|
Class: uint32(classID),
|
||||||
Device: uint16(deviceID),
|
Device: uint16(deviceID),
|
||||||
Driver: driver,
|
Driver: driver,
|
||||||
|
IommuGroup: int(iommuGroup),
|
||||||
NumaNode: int(numaNode),
|
NumaNode: int(numaNode),
|
||||||
Config: config,
|
Config: config,
|
||||||
Resources: resources,
|
Resources: resources,
|
||||||
|
@ -46,6 +46,7 @@ func TestNvpci(t *testing.T) {
|
|||||||
require.Equal(t, devices[0].Vendor, config.GetVendorID(), "Vendor IDs do not match")
|
require.Equal(t, devices[0].Vendor, config.GetVendorID(), "Vendor IDs do not match")
|
||||||
require.Equal(t, devices[0].Device, config.GetDeviceID(), "Device IDs do not match")
|
require.Equal(t, devices[0].Device, config.GetDeviceID(), "Device IDs do not match")
|
||||||
require.Equal(t, "nvidia", devices[0].Driver, "Wrong driver detected for device")
|
require.Equal(t, "nvidia", devices[0].Driver, "Wrong driver detected for device")
|
||||||
|
require.Equal(t, 20, devices[0].IommuGroup, "Wrong iommu_group detected for device")
|
||||||
|
|
||||||
capabilities, err := config.GetPCICapabilities()
|
capabilities, err := config.GetPCICapabilities()
|
||||||
require.Nil(t, err, "Error getting PCI capabilities")
|
require.Nil(t, err, "Error getting PCI capabilities")
|
||||||
|
Loading…
Reference in New Issue
Block a user