/* * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package nvml import "fmt" type MockServer struct { Devices []Device } type MockLunaServer struct { MockServer } type MockA100Device struct { Index int MinorNumber int MigMode int GpuInstances map[*MockA100GpuInstance]struct{} GpuInstanceCounter uint32 } type MockA100GpuInstance struct { Info GpuInstanceInfo ComputeInstances map[*MockA100ComputeInstance]struct{} ComputeInstanceCounter uint32 } type MockA100ComputeInstance struct { Info ComputeInstanceInfo } var _ Interface = (*MockLunaServer)(nil) var _ Device = (*MockA100Device)(nil) var _ GpuInstance = (*MockA100GpuInstance)(nil) var _ ComputeInstance = (*MockA100ComputeInstance)(nil) var MockA100MIGProfiles = struct { GpuInstanceProfiles map[int]GpuInstanceProfileInfo ComputeInstanceProfiles map[int]map[int]ComputeInstanceProfileInfo }{ GpuInstanceProfiles: map[int]GpuInstanceProfileInfo{ GPU_INSTANCE_PROFILE_1_SLICE: { Id: GPU_INSTANCE_PROFILE_1_SLICE, IsP2pSupported: 0, SliceCount: 1, InstanceCount: 7, MultiprocessorCount: 1, CopyEngineCount: 1, DecoderCount: 0, EncoderCount: 0, JpegCount: 0, OfaCount: 0, MemorySizeMB: 5120, }, GPU_INSTANCE_PROFILE_2_SLICE: { Id: GPU_INSTANCE_PROFILE_2_SLICE, IsP2pSupported: 0, SliceCount: 2, InstanceCount: 3, MultiprocessorCount: 2, CopyEngineCount: 2, DecoderCount: 1, EncoderCount: 1, JpegCount: 0, OfaCount: 0, MemorySizeMB: 10240, }, GPU_INSTANCE_PROFILE_3_SLICE: { Id: GPU_INSTANCE_PROFILE_3_SLICE, IsP2pSupported: 0, SliceCount: 3, InstanceCount: 2, MultiprocessorCount: 3, CopyEngineCount: 4, DecoderCount: 2, EncoderCount: 2, JpegCount: 0, OfaCount: 0, MemorySizeMB: 20480, }, GPU_INSTANCE_PROFILE_4_SLICE: { Id: GPU_INSTANCE_PROFILE_4_SLICE, IsP2pSupported: 0, SliceCount: 4, InstanceCount: 1, MultiprocessorCount: 4, CopyEngineCount: 4, DecoderCount: 2, EncoderCount: 2, JpegCount: 0, OfaCount: 0, MemorySizeMB: 20480, }, GPU_INSTANCE_PROFILE_7_SLICE: { Id: GPU_INSTANCE_PROFILE_7_SLICE, IsP2pSupported: 0, SliceCount: 7, InstanceCount: 1, MultiprocessorCount: 7, CopyEngineCount: 8, DecoderCount: 5, EncoderCount: 5, JpegCount: 1, OfaCount: 1, MemorySizeMB: 40960, }, }, ComputeInstanceProfiles: map[int]map[int]ComputeInstanceProfileInfo{ GPU_INSTANCE_PROFILE_1_SLICE: { COMPUTE_INSTANCE_PROFILE_1_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_1_SLICE, SliceCount: 1, InstanceCount: 1, MultiprocessorCount: 1, SharedCopyEngineCount: 1, SharedDecoderCount: 0, SharedEncoderCount: 0, SharedJpegCount: 0, SharedOfaCount: 0, }, }, GPU_INSTANCE_PROFILE_2_SLICE: { COMPUTE_INSTANCE_PROFILE_1_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_1_SLICE, SliceCount: 1, InstanceCount: 2, MultiprocessorCount: 1, SharedCopyEngineCount: 2, SharedDecoderCount: 1, SharedEncoderCount: 1, SharedJpegCount: 0, SharedOfaCount: 0, }, COMPUTE_INSTANCE_PROFILE_2_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_2_SLICE, SliceCount: 2, InstanceCount: 1, MultiprocessorCount: 2, SharedCopyEngineCount: 2, SharedDecoderCount: 1, SharedEncoderCount: 1, SharedJpegCount: 0, SharedOfaCount: 0, }, }, GPU_INSTANCE_PROFILE_3_SLICE: { COMPUTE_INSTANCE_PROFILE_1_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_1_SLICE, SliceCount: 1, InstanceCount: 3, MultiprocessorCount: 1, SharedCopyEngineCount: 4, SharedDecoderCount: 2, SharedEncoderCount: 1, SharedJpegCount: 0, SharedOfaCount: 0, }, COMPUTE_INSTANCE_PROFILE_2_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_2_SLICE, SliceCount: 2, InstanceCount: 1, MultiprocessorCount: 2, SharedCopyEngineCount: 4, SharedDecoderCount: 2, SharedEncoderCount: 2, SharedJpegCount: 0, SharedOfaCount: 0, }, COMPUTE_INSTANCE_PROFILE_3_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_3_SLICE, SliceCount: 3, InstanceCount: 1, MultiprocessorCount: 3, SharedCopyEngineCount: 4, SharedDecoderCount: 2, SharedEncoderCount: 0, SharedJpegCount: 0, SharedOfaCount: 0, }, }, GPU_INSTANCE_PROFILE_4_SLICE: { COMPUTE_INSTANCE_PROFILE_1_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_1_SLICE, SliceCount: 1, InstanceCount: 4, MultiprocessorCount: 1, SharedCopyEngineCount: 4, SharedDecoderCount: 2, SharedEncoderCount: 2, SharedJpegCount: 0, SharedOfaCount: 0, }, COMPUTE_INSTANCE_PROFILE_2_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_2_SLICE, SliceCount: 2, InstanceCount: 2, MultiprocessorCount: 2, SharedCopyEngineCount: 4, SharedDecoderCount: 2, SharedEncoderCount: 2, SharedJpegCount: 0, SharedOfaCount: 0, }, COMPUTE_INSTANCE_PROFILE_4_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_4_SLICE, SliceCount: 4, InstanceCount: 1, MultiprocessorCount: 4, SharedCopyEngineCount: 4, SharedDecoderCount: 2, SharedEncoderCount: 2, SharedJpegCount: 0, SharedOfaCount: 0, }, }, GPU_INSTANCE_PROFILE_7_SLICE: { COMPUTE_INSTANCE_PROFILE_1_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_1_SLICE, SliceCount: 1, InstanceCount: 7, MultiprocessorCount: 1, SharedCopyEngineCount: 8, SharedDecoderCount: 5, SharedEncoderCount: 5, SharedJpegCount: 1, SharedOfaCount: 1, }, COMPUTE_INSTANCE_PROFILE_2_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_2_SLICE, SliceCount: 2, InstanceCount: 3, MultiprocessorCount: 2, SharedCopyEngineCount: 8, SharedDecoderCount: 5, SharedEncoderCount: 5, SharedJpegCount: 1, SharedOfaCount: 1, }, COMPUTE_INSTANCE_PROFILE_3_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_3_SLICE, SliceCount: 3, InstanceCount: 2, MultiprocessorCount: 3, SharedCopyEngineCount: 8, SharedDecoderCount: 5, SharedEncoderCount: 5, SharedJpegCount: 1, SharedOfaCount: 1, }, COMPUTE_INSTANCE_PROFILE_4_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_4_SLICE, SliceCount: 4, InstanceCount: 1, MultiprocessorCount: 4, SharedCopyEngineCount: 8, SharedDecoderCount: 5, SharedEncoderCount: 5, SharedJpegCount: 1, SharedOfaCount: 1, }, COMPUTE_INSTANCE_PROFILE_7_SLICE: { Id: COMPUTE_INSTANCE_PROFILE_7_SLICE, SliceCount: 7, InstanceCount: 1, MultiprocessorCount: 7, SharedCopyEngineCount: 8, SharedDecoderCount: 5, SharedEncoderCount: 5, SharedJpegCount: 1, SharedOfaCount: 1, }, }, }, } func NewMockNVMLServer(devices ...Device) Interface { return &MockServer{ Devices: devices, } } func NewMockNVMLOnLunaServer() Interface { devices := []Device{ NewMockA100Device(0), NewMockA100Device(1), NewMockA100Device(2), NewMockA100Device(3), NewMockA100Device(4), NewMockA100Device(5), NewMockA100Device(6), NewMockA100Device(7), } return NewMockNVMLServer(devices...) } func NewMockA100Device(index int) Device { return &MockA100Device{ Index: index, GpuInstances: make(map[*MockA100GpuInstance]struct{}), GpuInstanceCounter: 0, } } func NewMockA100GpuInstance(info GpuInstanceInfo) GpuInstance { return &MockA100GpuInstance{ Info: info, ComputeInstances: make(map[*MockA100ComputeInstance]struct{}), ComputeInstanceCounter: 0, } } func NewMockA100ComputeInstance(info ComputeInstanceInfo) ComputeInstance { return &MockA100ComputeInstance{ Info: info, } } func (n *MockServer) Init() Return { return MockReturn(SUCCESS) } func (n *MockServer) Shutdown() Return { return MockReturn(SUCCESS) } func (n *MockServer) DeviceGetCount() (int, Return) { return len(n.Devices), MockReturn(SUCCESS) } func (n *MockServer) DeviceGetHandleByIndex(index int) (Device, Return) { if index < 0 || index >= len(n.Devices) { return nil, MockReturn(ERROR_INVALID_ARGUMENT) } return n.Devices[index], MockReturn(SUCCESS) } func (n *MockServer) SystemGetDriverVersion() (string, Return) { return "999.99", MockReturn(SUCCESS) } func (d *MockA100Device) GetIndex() (int, Return) { return d.Index, MockReturn(SUCCESS) } func (d *MockA100Device) GetPciInfo() (PciInfo, Return) { var busID [32]int8 for i, b := range []byte("0000FFFF:FF:FF.F") { busID[i] = int8(b) } p := PciInfo{ BusId: busID, PciDeviceId: 0x20B010DE, } return p, MockReturn(SUCCESS) } func (d *MockA100Device) GetUUID() (string, Return) { return fmt.Sprintf("GPU-%d", d.Index), MockReturn(SUCCESS) } func (d *MockA100Device) GetMinorNumber() (int, Return) { return d.MinorNumber, MockReturn(SUCCESS) } func (d *MockA100Device) SetMigMode(mode int) (Return, Return) { d.MigMode = mode return MockReturn(SUCCESS), MockReturn(SUCCESS) } func (d *MockA100Device) GetMigMode() (int, int, Return) { return d.MigMode, d.MigMode, MockReturn(SUCCESS) } func (d *MockA100Device) GetGpuInstanceProfileInfo(giProfileId int) (GpuInstanceProfileInfo, Return) { if giProfileId < 0 || giProfileId >= GPU_INSTANCE_PROFILE_COUNT { return GpuInstanceProfileInfo{}, MockReturn(ERROR_INVALID_ARGUMENT) } if _, exists := MockA100MIGProfiles.GpuInstanceProfiles[giProfileId]; !exists { return GpuInstanceProfileInfo{}, MockReturn(ERROR_NOT_SUPPORTED) } return MockA100MIGProfiles.GpuInstanceProfiles[giProfileId], MockReturn(SUCCESS) } func (d *MockA100Device) CreateGpuInstance(info *GpuInstanceProfileInfo) (GpuInstance, Return) { giInfo := GpuInstanceInfo{ Device: d, Id: d.GpuInstanceCounter, ProfileId: info.Id, } d.GpuInstanceCounter++ gi := NewMockA100GpuInstance(giInfo) d.GpuInstances[gi.(*MockA100GpuInstance)] = struct{}{} return gi, MockReturn(SUCCESS) } func (d *MockA100Device) GetGpuInstances(info *GpuInstanceProfileInfo) ([]GpuInstance, Return) { var gis []GpuInstance for gi := range d.GpuInstances { if gi.Info.ProfileId == info.Id { gis = append(gis, gi) } } return gis, MockReturn(SUCCESS) } func (d *MockA100Device) GetMaxMigDeviceCount() (int, Return) { var count int for gi := range d.GpuInstances { count = count + int(gi.ComputeInstanceCounter) } return count, MockReturn(SUCCESS) } func (d *MockA100Device) GetMigDeviceHandleByIndex(Index int) (Device, Return) { var count int for gi := range d.GpuInstances { if count+int(gi.ComputeInstanceCounter) < Index { count = count + int(gi.ComputeInstanceCounter) continue } for ci := range gi.ComputeInstances { if count < Index { count++ continue } return ci, MockReturn(SUCCESS) } } return nil, MockReturn(ERROR_NOT_FOUND) } func (d *MockA100Device) GetDeviceHandleFromMigDeviceHandle() (Device, Return) { return nil, MockReturn(ERROR_NOT_SUPPORTED) } func (d *MockA100Device) IsMigDeviceHandle() (bool, Return) { return false, MockReturn(SUCCESS) } func (d *MockA100Device) GetComputeInstanceId() (int, Return) { panic("Not implemented: GetComputeInstanceId") } func (d *MockA100Device) GetGPUInstanceId() (int, Return) { panic("Not implemented: GetGPUInstanceId") } func (gi *MockA100GpuInstance) GetInfo() (GpuInstanceInfo, Return) { return gi.Info, MockReturn(SUCCESS) } func (gi *MockA100GpuInstance) GetComputeInstanceProfileInfo(ciProfileId int, ciEngProfileId int) (ComputeInstanceProfileInfo, Return) { if ciProfileId < 0 || ciProfileId >= COMPUTE_INSTANCE_PROFILE_COUNT { return ComputeInstanceProfileInfo{}, MockReturn(ERROR_INVALID_ARGUMENT) } if ciEngProfileId != COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED { return ComputeInstanceProfileInfo{}, MockReturn(ERROR_NOT_SUPPORTED) } giProfileId := int(gi.Info.ProfileId) if _, exists := MockA100MIGProfiles.ComputeInstanceProfiles[giProfileId]; !exists { return ComputeInstanceProfileInfo{}, MockReturn(ERROR_NOT_SUPPORTED) } if _, exists := MockA100MIGProfiles.ComputeInstanceProfiles[giProfileId][ciProfileId]; !exists { return ComputeInstanceProfileInfo{}, MockReturn(ERROR_NOT_SUPPORTED) } return MockA100MIGProfiles.ComputeInstanceProfiles[giProfileId][ciProfileId], MockReturn(SUCCESS) } func (gi *MockA100GpuInstance) CreateComputeInstance(info *ComputeInstanceProfileInfo) (ComputeInstance, Return) { ciInfo := ComputeInstanceInfo{ Device: gi.Info.Device, GpuInstance: gi, Id: gi.ComputeInstanceCounter, ProfileId: info.Id, } gi.ComputeInstanceCounter++ ci := NewMockA100ComputeInstance(ciInfo) gi.ComputeInstances[ci.(*MockA100ComputeInstance)] = struct{}{} return ci, MockReturn(SUCCESS) } func (gi *MockA100GpuInstance) GetComputeInstances(info *ComputeInstanceProfileInfo) ([]ComputeInstance, Return) { var cis []ComputeInstance for ci := range gi.ComputeInstances { if ci.Info.ProfileId == info.Id { cis = append(cis, ci) } } return cis, MockReturn(SUCCESS) } func (gi *MockA100GpuInstance) Destroy() Return { delete(gi.Info.Device.(*MockA100Device).GpuInstances, gi) return MockReturn(SUCCESS) } func (ci *MockA100ComputeInstance) GetInfo() (ComputeInstanceInfo, Return) { return ci.Info, MockReturn(SUCCESS) } func (ci *MockA100ComputeInstance) Destroy() Return { delete(ci.Info.GpuInstance.(*MockA100GpuInstance).ComputeInstances, ci) return MockReturn(SUCCESS) } // Since a compute instance can be used as a MIG device handle, it must also // implement the Device interface var _ Device = (*MockA100ComputeInstance)(nil) func (c *MockA100ComputeInstance) GetIndex() (int, Return) { return int(c.Info.Id), MockReturn(SUCCESS) } func (c *MockA100ComputeInstance) GetPciInfo() (PciInfo, Return) { // TODO: How does this behave on an actual MIG system? panic("Not implemented: GetPciInfo") } func (c *MockA100ComputeInstance) GetUUID() (string, Return) { return fmt.Sprintf("MIG-%d", c.Info.Id), MockReturn(SUCCESS) } func (c *MockA100ComputeInstance) GetMinorNumber() (int, Return) { // TODO: This depends on the content of the mig-minors file and the (gpu, gi, ci) tuple panic("Not implemented: GetMinorNumber") } func (c *MockA100ComputeInstance) SetMigMode(Mode int) (Return, Return) { panic("Not implemented: SetMigMode") } func (c *MockA100ComputeInstance) GetMigMode() (int, int, Return) { panic("Not implemented: GetMigMode") } func (c *MockA100ComputeInstance) GetGpuInstanceProfileInfo(Profile int) (GpuInstanceProfileInfo, Return) { panic("Not implemented: GetGpuInstanceProfileInfo") } func (c *MockA100ComputeInstance) CreateGpuInstance(Info *GpuInstanceProfileInfo) (GpuInstance, Return) { panic("Not implemented: CreateGpuInstance") } func (c *MockA100ComputeInstance) GetGpuInstances(Info *GpuInstanceProfileInfo) ([]GpuInstance, Return) { panic("Not implemented: GetGpuInstances") } func (c *MockA100ComputeInstance) GetMaxMigDeviceCount() (int, Return) { panic("Not implemented: GetMaxMigDeviceCount") } func (c *MockA100ComputeInstance) GetMigDeviceHandleByIndex(Index int) (Device, Return) { panic("Not implemented: GetMigDeviceHandleByIndex") } func (c *MockA100ComputeInstance) GetDeviceHandleFromMigDeviceHandle() (Device, Return) { return c.Info.Device, MockReturn(SUCCESS) } func (c *MockA100ComputeInstance) IsMigDeviceHandle() (bool, Return) { return true, MockReturn(SUCCESS) } func (c *MockA100ComputeInstance) GetComputeInstanceId() (int, Return) { return int(c.Info.Id), MockReturn(SUCCESS) } func (c *MockA100ComputeInstance) GetGPUInstanceId() (int, Return) { info, r := c.Info.GpuInstance.GetInfo() if r.Value() != SUCCESS { return 0, MockReturn(r.Value()) } return int(info.Id), MockReturn(SUCCESS) }