mirror of
https://github.com/clearml/go-nvlib
synced 2025-06-11 17:00:19 +00:00
Merge branch 'add-numa-node-to-nvpci' into 'master'
Add numa node as a standard field in the nvpci struct See merge request nvidia/cloud-native/go-nvlib!2
This commit is contained in:
commit
96f9d0d39e
@ -25,17 +25,13 @@ import (
|
|||||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci/bytes"
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci/bytes"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MockA100 struct {
|
type MockNvpci struct {
|
||||||
*nvpci
|
*nvpci
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MockA100) Cleanup() {
|
var _ Interface = (*MockNvpci)(nil)
|
||||||
os.RemoveAll(m.pciDevicesRoot)
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ Interface = (*MockA100)(nil)
|
func NewMockNvpci() (mock *MockNvpci, rerr error) {
|
||||||
|
|
||||||
func NewMockA100() (mock *MockA100, rerr error) {
|
|
||||||
rootDir, err := ioutil.TempDir("", "")
|
rootDir, err := ioutil.TempDir("", "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -46,42 +42,63 @@ func NewMockA100() (mock *MockA100, rerr error) {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
deviceDir := filepath.Join(rootDir, "0000:80:05.1")
|
mock = &MockNvpci{
|
||||||
err = os.MkdirAll(deviceDir, 0755)
|
&nvpci{rootDir},
|
||||||
|
}
|
||||||
|
|
||||||
|
return mock, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockNvpci) Cleanup() {
|
||||||
|
os.RemoveAll(m.pciDevicesRoot)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockNvpci) AddMockA100(address string, numaNode int) error {
|
||||||
|
deviceDir := filepath.Join(m.pciDevicesRoot, address)
|
||||||
|
err := os.MkdirAll(deviceDir, 0755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
vendor, err := os.Create(filepath.Join(deviceDir, "vendor"))
|
vendor, err := os.Create(filepath.Join(deviceDir, "vendor"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
_, err = vendor.WriteString(fmt.Sprintf("0x%x", pciNvidiaVendorID))
|
_, err = vendor.WriteString(fmt.Sprintf("0x%x", pciNvidiaVendorID))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
class, err := os.Create(filepath.Join(deviceDir, "class"))
|
class, err := os.Create(filepath.Join(deviceDir, "class"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
_, err = class.WriteString(fmt.Sprintf("0x%x", pci3dControllerClass))
|
_, err = class.WriteString(fmt.Sprintf("0x%x", pci3dControllerClass))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
device, err := os.Create(filepath.Join(deviceDir, "device"))
|
device, err := os.Create(filepath.Join(deviceDir, "device"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
_, err = device.WriteString("0x20bf")
|
_, err = device.WriteString("0x20bf")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = numa.WriteString(fmt.Sprintf("%v", numaNode))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
config, err := os.Create(filepath.Join(deviceDir, "config"))
|
config, err := os.Create(filepath.Join(deviceDir, "config"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
_data := make([]byte, pciCfgSpaceStandardSize)
|
_data := make([]byte, pciCfgSpaceStandardSize)
|
||||||
data := bytes.New(&_data)
|
data := bytes.New(&_data)
|
||||||
@ -89,32 +106,28 @@ func NewMockA100() (mock *MockA100, rerr error) {
|
|||||||
data.Write16(2, uint16(0x20bf))
|
data.Write16(2, uint16(0x20bf))
|
||||||
_, err = config.Write(*data.Raw())
|
_, err = config.Write(*data.Raw())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
bar0 := []uint64{0x00000000c2000000, 0x00000000c2ffffff, 0x0000000000040200}
|
bar0 := []uint64{0x00000000c2000000, 0x00000000c2ffffff, 0x0000000000040200}
|
||||||
resource, err := os.Create(filepath.Join(deviceDir, "resource"))
|
resource, err := os.Create(filepath.Join(deviceDir, "resource"))
|
||||||
_, err = resource.WriteString(fmt.Sprintf("0x%x 0x%x 0x%x", bar0[0], bar0[1], bar0[2]))
|
_, err = resource.WriteString(fmt.Sprintf("0x%x 0x%x 0x%x", bar0[0], bar0[1], bar0[2]))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
pmcID := uint32(0x170000a1)
|
pmcID := uint32(0x170000a1)
|
||||||
resource0, err := os.Create(filepath.Join(deviceDir, "resource0"))
|
resource0, err := os.Create(filepath.Join(deviceDir, "resource0"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
_data = make([]byte, bar0[1]-bar0[0]+1)
|
_data = make([]byte, bar0[1]-bar0[0]+1)
|
||||||
data = bytes.New(&_data).LittleEndian()
|
data = bytes.New(&_data).LittleEndian()
|
||||||
data.Write32(0, pmcID)
|
data.Write32(0, pmcID)
|
||||||
_, err = resource0.Write(*data.Raw())
|
_, err = resource0.Write(*data.Raw())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
mock = &MockA100{
|
return nil
|
||||||
&nvpci{rootDir},
|
|
||||||
}
|
|
||||||
|
|
||||||
return mock, nil
|
|
||||||
}
|
}
|
||||||
|
@ -61,6 +61,7 @@ type NvidiaPCIDevice struct {
|
|||||||
Vendor uint16
|
Vendor uint16
|
||||||
Class uint32
|
Class uint32
|
||||||
Device uint16
|
Device uint16
|
||||||
|
NumaNode int
|
||||||
Config *ConfigSpace
|
Config *ConfigSpace
|
||||||
Resources map[int]*MemoryResource
|
Resources map[int]*MemoryResource
|
||||||
}
|
}
|
||||||
@ -147,6 +148,16 @@ func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) {
|
|||||||
return nil, fmt.Errorf("unable to convert device string to uint16: %v", deviceStr)
|
return nil, fmt.Errorf("unable to convert device string to uint16: %v", deviceStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
numa, err := ioutil.ReadFile(path.Join(devicePath, "numa_node"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to read PCI NUMA node for %s: %v", address, err)
|
||||||
|
}
|
||||||
|
numaStr := strings.TrimSpace(string(numa))
|
||||||
|
numaNode, err := strconv.ParseInt(numaStr, 0, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to convert NUMA node string to int64: %v", numaNode)
|
||||||
|
}
|
||||||
|
|
||||||
config := &ConfigSpace{
|
config := &ConfigSpace{
|
||||||
Path: path.Join(devicePath, "config"),
|
Path: path.Join(devicePath, "config"),
|
||||||
}
|
}
|
||||||
@ -183,6 +194,7 @@ func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) {
|
|||||||
Vendor: uint16(vendorID),
|
Vendor: uint16(vendorID),
|
||||||
Class: uint32(classID),
|
Class: uint32(classID),
|
||||||
Device: uint16(deviceID),
|
Device: uint16(deviceID),
|
||||||
|
NumaNode: int(numaNode),
|
||||||
Config: config,
|
Config: config,
|
||||||
Resources: resources,
|
Resources: resources,
|
||||||
}
|
}
|
||||||
|
@ -27,14 +27,19 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestNvpci(t *testing.T) {
|
func TestNvpci(t *testing.T) {
|
||||||
nvpci, err := NewMockA100()
|
nvpci, err := NewMockNvpci()
|
||||||
require.Nil(t, err, "Error creating NewMockA100")
|
require.Nil(t, err, "Error creating NewMockNvpci")
|
||||||
defer nvpci.Cleanup()
|
defer nvpci.Cleanup()
|
||||||
|
|
||||||
|
err = nvpci.AddMockA100("0000:80:05.1", 0)
|
||||||
|
require.Nil(t, err, "Error adding Mock A100 device to MockNvpci")
|
||||||
|
|
||||||
devices, err := nvpci.GetGPUs()
|
devices, err := nvpci.GetGPUs()
|
||||||
require.Nil(t, err, "Error getting GPUs")
|
require.Nil(t, err, "Error getting GPUs")
|
||||||
require.Equal(t, 1, len(devices), "Wrong number of GPU devices")
|
require.Equal(t, 1, len(devices), "Wrong number of GPU devices")
|
||||||
require.Equal(t, 1, len(devices[0].Resources), "Wrong number GPU resources found")
|
require.Equal(t, 1, len(devices[0].Resources), "Wrong number GPU resources found")
|
||||||
|
require.Equal(t, "0000:80:05.1", devices[0].Address, "Wrong Address found for device")
|
||||||
|
require.Equal(t, 0, devices[0].NumaNode, "Wrong NUMA node found for device")
|
||||||
|
|
||||||
config, err := devices[0].Config.Read()
|
config, err := devices[0].Config.Read()
|
||||||
require.Nil(t, err, "Error reading config")
|
require.Nil(t, err, "Error reading config")
|
||||||
@ -58,3 +63,39 @@ func TestNvpci(t *testing.T) {
|
|||||||
require.Equal(t, int(resource0.End-resource0.Start+1), bar0.Len())
|
require.Equal(t, int(resource0.End-resource0.Start+1), bar0.Len())
|
||||||
require.Equal(t, ga100PmcID, bar0.Read32(0))
|
require.Equal(t, ga100PmcID, bar0.Read32(0))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNvpciNUMANode(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
Description string
|
||||||
|
NumaNode int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
Description: "Numa Node -1",
|
||||||
|
NumaNode: -1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Description: "Numa Node 0",
|
||||||
|
NumaNode: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Description: "Numa Node 1",
|
||||||
|
NumaNode: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.Description, func(t *testing.T) {
|
||||||
|
nvpci, err := NewMockNvpci()
|
||||||
|
require.Nil(t, err, "Error creating NewMockNvpci")
|
||||||
|
defer nvpci.Cleanup()
|
||||||
|
|
||||||
|
err = nvpci.AddMockA100("0000:80:05.1", tc.NumaNode)
|
||||||
|
require.Nil(t, err, "Error adding Mock A100 device to MockNvpci")
|
||||||
|
|
||||||
|
devices, err := nvpci.GetGPUs()
|
||||||
|
require.Nil(t, err, "Error getting GPUs")
|
||||||
|
require.Equal(t, 1, len(devices), "Wrong number of GPU devices")
|
||||||
|
require.Equal(t, tc.NumaNode, devices[0].NumaNode, "Wrong NUMA node found for device")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user