diff --git a/pkg/nvmdev/mock.go b/pkg/nvmdev/mock.go index 626e3e9..0d25359 100644 --- a/pkg/nvmdev/mock.go +++ b/pkg/nvmdev/mock.go @@ -100,6 +100,24 @@ func (m *MockNvmdev) AddMockA100Parent(address string, numaNode int) error { return err } + _, err = os.Create(filepath.Join(deviceDir, "nvidia")) + if err != nil { + return err + } + err = os.Symlink(filepath.Join(deviceDir, "nvidia"), filepath.Join(deviceDir, "driver")) + if err != nil { + return err + } + + _, err = os.Create(filepath.Join(deviceDir, "20")) + if err != nil { + return err + } + err = os.Symlink(filepath.Join(deviceDir, "20"), filepath.Join(deviceDir, "iommu_group")) + if err != nil { + return err + } + numa, err := os.Create(filepath.Join(deviceDir, "numa_node")) if err != nil { return err @@ -201,6 +219,12 @@ func (m *MockNvmdev) AddMockA100Mdev(uuid string, mdevType string, mdevTypeDir s return err } err = os.Symlink(filepath.Join(mdevDeviceDir, "vfio_mdev"), filepath.Join(mdevDeviceDir, "driver")) + + _, err = os.Create(filepath.Join(mdevDeviceDir, "200")) + if err != nil { + return err + } + err = os.Symlink(filepath.Join(mdevDeviceDir, "200"), filepath.Join(mdevDeviceDir, "iommu_group")) if err != nil { return err } diff --git a/pkg/nvmdev/nvmdev.go b/pkg/nvmdev/nvmdev.go index dfcfef7..c1521ca 100644 --- a/pkg/nvmdev/nvmdev.go +++ b/pkg/nvmdev/nvmdev.go @@ -53,11 +53,12 @@ type ParentDevice struct { // Device represents an NVIDIA MDEV (vGPU) device type Device struct { - Path string - UUID string - MDEVType string - Driver string - Parent *ParentDevice + Path string + UUID string + MDEVType string + Driver string + IommuGroup int + Parent *ParentDevice } // New interface that allows us to get a list of all NVIDIA parent and MDEV (vGPU) devices @@ -149,12 +150,18 @@ func NewDevice(root string, uuid string) (*Device, error) { return nil, fmt.Errorf("error detecting driver: %v", err) } + iommuGroup, err := m.iommuGroup() + if err != nil { + return nil, fmt.Errorf("error getting iommu_group: %v", err) + } + device := Device{ - Path: path, - UUID: uuid, - MDEVType: mdevType, - Driver: driver, - Parent: parent, + Path: path, + UUID: uuid, + MDEVType: mdevType, + Driver: driver, + IommuGroup: iommuGroup, + Parent: parent, } return &device, nil @@ -175,15 +182,25 @@ func newMdev(devicePath string) (mdev, error) { func (m mdev) String() string { return string(m) } + +func (m mdev) resolve(target string) (string, error) { + resolved, err := filepath.EvalSymlinks(path.Join(string(m), target)) + if err != nil { + return "", fmt.Errorf("error resolving %q: %v", target, err) + } + + return resolved, nil +} + func (m mdev) parentDevicePath() string { // /sys/bus/pci/devices// return path.Dir(string(m)) } func (m mdev) Type() (string, error) { - mdevTypeDir, err := filepath.EvalSymlinks(path.Join(string(m), "mdev_type")) + mdevTypeDir, err := m.resolve("mdev_type") if err != nil { - return "", fmt.Errorf("error resolving mdev_type link for mdev %s: %v", m, err) + return "", err } mdevType, err := os.ReadFile(path.Join(mdevTypeDir, "name")) @@ -201,13 +218,27 @@ func (m mdev) Type() (string, error) { } func (m mdev) driver() (string, error) { - driver, err := filepath.EvalSymlinks(path.Join(string(m), "driver")) + driver, err := m.resolve("driver") if err != nil { return "", err } return filepath.Base(driver), nil } +func (m mdev) iommuGroup() (int, error) { + iommu, err := m.resolve("iommu_group") + if err != nil { + return -1, err + } + iommuGroupStr := strings.TrimSpace(filepath.Base(iommu)) + iommuGroup, err := strconv.ParseInt(iommuGroupStr, 0, 64) + if err != nil { + return -1, fmt.Errorf("unable to convert iommu_group string to int64: %v", iommuGroupStr) + } + + return int(iommuGroup), nil +} + // NewParentDevice constructs a ParentDevice func NewParentDevice(devicePath string) (*ParentDevice, error) { nvdevice, err := nvpci.NewDevice(devicePath) diff --git a/pkg/nvmdev/nvmdev_test.go b/pkg/nvmdev/nvmdev_test.go index 78d1058..bd5b60c 100644 --- a/pkg/nvmdev/nvmdev_test.go +++ b/pkg/nvmdev/nvmdev_test.go @@ -46,5 +46,7 @@ func TestNvmdev(t *testing.T) { mdevs, err := nvmdev.GetAllDevices() require.Nil(t, err, "Error getting NVIDIA MDEV (vGPU) devices") require.Equal(t, 1, len(mdevs), "Wrong number of NVIDIA MDEV (vGPU) devices") + require.Equal(t, "A100-4C", mdevs[0].MDEVType, "Wrong value for mdev_type") require.Equal(t, "vfio_mdev", mdevs[0].Driver, "Wrong driver detected for mdev device") + require.Equal(t, 200, mdevs[0].IommuGroup, "Wrong value for iommu_group") } diff --git a/pkg/nvpci/mock.go b/pkg/nvpci/mock.go index 438f562..0f3df3a 100644 --- a/pkg/nvpci/mock.go +++ b/pkg/nvpci/mock.go @@ -99,6 +99,15 @@ func (m *MockNvpci) AddMockA100(address string, numaNode int) error { return err } + _, err = os.Create(filepath.Join(deviceDir, "20")) + if err != nil { + return err + } + err = os.Symlink(filepath.Join(deviceDir, "20"), filepath.Join(deviceDir, "iommu_group")) + if err != nil { + return err + } + numa, err := os.Create(filepath.Join(deviceDir, "numa_node")) if err != nil { return err diff --git a/pkg/nvpci/nvpci.go b/pkg/nvpci/nvpci.go index 924189f..8ad2e54 100644 --- a/pkg/nvpci/nvpci.go +++ b/pkg/nvpci/nvpci.go @@ -65,15 +65,16 @@ var _ ResourceInterface = (*MemoryResources)(nil) // NvidiaPCIDevice represents a PCI device for an NVIDIA product type NvidiaPCIDevice struct { - Path string - Address string - Vendor uint16 - Class uint32 - Device uint16 - Driver string - NumaNode int - Config *ConfigSpace - Resources MemoryResources + Path string + Address string + Vendor uint16 + Class uint32 + Device uint16 + Driver string + IommuGroup int + NumaNode int + Config *ConfigSpace + Resources MemoryResources } // IsVGAController if class == 0x300 @@ -203,6 +204,20 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) { return nil, fmt.Errorf("unable to detect driver for %s: %v", address, err) } + var iommuGroup int64 + iommu, err := filepath.EvalSymlinks(path.Join(devicePath, "iommu_group")) + if err == nil { + iommuGroupStr := strings.TrimSpace(filepath.Base(iommu)) + iommuGroup, err = strconv.ParseInt(iommuGroupStr, 0, 64) + if err != nil { + return nil, fmt.Errorf("unable to convert iommu_group string to int64: %v", iommuGroupStr) + } + } else if os.IsNotExist(err) { + iommuGroup = -1 + } else { + return nil, fmt.Errorf("unable to detect iommu_group for %s: %v", address, err) + } + numa, err := os.ReadFile(path.Join(devicePath, "numa_node")) if err != nil { return nil, fmt.Errorf("unable to read PCI NUMA node for %s: %v", address, err) @@ -244,15 +259,16 @@ func NewDevice(devicePath string) (*NvidiaPCIDevice, error) { } nvdevice := &NvidiaPCIDevice{ - Path: devicePath, - Address: address, - Vendor: uint16(vendorID), - Class: uint32(classID), - Device: uint16(deviceID), - Driver: driver, - NumaNode: int(numaNode), - Config: config, - Resources: resources, + Path: devicePath, + Address: address, + Vendor: uint16(vendorID), + Class: uint32(classID), + Device: uint16(deviceID), + Driver: driver, + IommuGroup: int(iommuGroup), + NumaNode: int(numaNode), + Config: config, + Resources: resources, } return nvdevice, nil diff --git a/pkg/nvpci/nvpci_test.go b/pkg/nvpci/nvpci_test.go index 8dbf50f..e6cbdbf 100644 --- a/pkg/nvpci/nvpci_test.go +++ b/pkg/nvpci/nvpci_test.go @@ -46,6 +46,7 @@ func TestNvpci(t *testing.T) { require.Equal(t, devices[0].Vendor, config.GetVendorID(), "Vendor IDs do not match") require.Equal(t, devices[0].Device, config.GetDeviceID(), "Device IDs do not match") require.Equal(t, "nvidia", devices[0].Driver, "Wrong driver detected for device") + require.Equal(t, 20, devices[0].IommuGroup, "Wrong iommu_group detected for device") capabilities, err := config.GetPCICapabilities() require.Nil(t, err, "Error getting PCI capabilities")