2022-08-11 14:11:04 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
package device
|
2022-08-11 14:11:04 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"math"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// AttributeMediaExtensions holds the string representation for the media extension MIG profile attribute.
|
|
|
|
AttributeMediaExtensions = "me"
|
|
|
|
)
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
// MigProfile represents a specific MIG profile.
|
2022-08-11 14:11:04 +00:00
|
|
|
// Examples include "1g.5gb", "2g.10gb", "1c.2g.10gb", or "1c.1g.5gb+me", etc.
|
2022-09-16 10:04:59 +00:00
|
|
|
type MigProfile interface {
|
2022-08-11 14:11:04 +00:00
|
|
|
String() string
|
2022-09-16 10:04:59 +00:00
|
|
|
GetInfo() MigProfileInfo
|
|
|
|
Equals(other MigProfile) bool
|
2022-08-11 14:11:04 +00:00
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
// MigProfileInfo holds all info associated with a specific MIG profile
|
|
|
|
type MigProfileInfo struct {
|
2022-08-11 14:11:04 +00:00
|
|
|
C int
|
|
|
|
G int
|
|
|
|
GB int
|
|
|
|
Attributes []string
|
|
|
|
GIProfileID int
|
|
|
|
CIProfileID int
|
|
|
|
CIEngProfileID int
|
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
var _ MigProfile = &MigProfileInfo{}
|
2022-08-11 14:11:04 +00:00
|
|
|
|
|
|
|
// NewProfile constructs a new Profile struct using info from the giProfiles and ciProfiles used to create it.
|
2022-09-16 10:04:59 +00:00
|
|
|
func (d *devicelib) NewMigProfile(giProfileID, ciProfileID, ciEngProfileID int, migMemorySizeMB, deviceMemorySizeBytes uint64) (MigProfile, error) {
|
2022-08-11 14:11:04 +00:00
|
|
|
giSlices := 0
|
|
|
|
switch giProfileID {
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_1_SLICE:
|
|
|
|
giSlices = 1
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_1_SLICE_REV1:
|
|
|
|
giSlices = 1
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_2_SLICE:
|
|
|
|
giSlices = 2
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_3_SLICE:
|
|
|
|
giSlices = 3
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_4_SLICE:
|
|
|
|
giSlices = 4
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_6_SLICE:
|
|
|
|
giSlices = 6
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_7_SLICE:
|
|
|
|
giSlices = 7
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_8_SLICE:
|
|
|
|
giSlices = 8
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("invalid GPU Instance Profile ID: %v", giProfileID)
|
|
|
|
}
|
|
|
|
|
|
|
|
ciSlices := 0
|
|
|
|
switch ciProfileID {
|
|
|
|
case nvml.COMPUTE_INSTANCE_PROFILE_1_SLICE:
|
|
|
|
ciSlices = 1
|
|
|
|
case nvml.COMPUTE_INSTANCE_PROFILE_2_SLICE:
|
|
|
|
ciSlices = 2
|
|
|
|
case nvml.COMPUTE_INSTANCE_PROFILE_3_SLICE:
|
|
|
|
ciSlices = 3
|
|
|
|
case nvml.COMPUTE_INSTANCE_PROFILE_4_SLICE:
|
|
|
|
ciSlices = 4
|
|
|
|
case nvml.COMPUTE_INSTANCE_PROFILE_6_SLICE:
|
|
|
|
ciSlices = 6
|
|
|
|
case nvml.COMPUTE_INSTANCE_PROFILE_7_SLICE:
|
|
|
|
ciSlices = 7
|
|
|
|
case nvml.COMPUTE_INSTANCE_PROFILE_8_SLICE:
|
|
|
|
ciSlices = 8
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("invalid Compute Instance Profile ID: %v", ciProfileID)
|
|
|
|
}
|
|
|
|
|
|
|
|
var attrs []string
|
|
|
|
switch giProfileID {
|
|
|
|
case nvml.GPU_INSTANCE_PROFILE_1_SLICE_REV1:
|
|
|
|
attrs = append(attrs, AttributeMediaExtensions)
|
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
p := &MigProfileInfo{
|
2022-08-11 14:11:04 +00:00
|
|
|
C: ciSlices,
|
|
|
|
G: giSlices,
|
|
|
|
GB: int(getMigMemorySizeGB(deviceMemorySizeBytes, migMemorySizeMB)),
|
|
|
|
Attributes: attrs,
|
|
|
|
GIProfileID: giProfileID,
|
|
|
|
CIProfileID: ciProfileID,
|
|
|
|
CIEngProfileID: ciEngProfileID,
|
|
|
|
}
|
|
|
|
|
|
|
|
return p, nil
|
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
// ParseMigProfile converts a string representation of a MigProfile into an object
|
|
|
|
func (d *devicelib) ParseMigProfile(profile string) (MigProfile, error) {
|
2022-08-11 14:11:04 +00:00
|
|
|
var err error
|
|
|
|
var c, g, gb int
|
|
|
|
var attrs []string
|
|
|
|
|
|
|
|
if len(profile) == 0 {
|
|
|
|
return nil, fmt.Errorf("empty Profile string")
|
|
|
|
}
|
|
|
|
|
|
|
|
split := strings.SplitN(profile, "+", 2)
|
|
|
|
if len(split) == 2 {
|
2022-09-16 10:04:59 +00:00
|
|
|
attrs, err = parseMigProfileAttributes(split[1])
|
2022-08-11 14:11:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("error parsing attributes following '+' in Profile string: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
c, g, gb, err = parseMigProfileFields(split[0])
|
2022-08-11 14:11:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("error parsing '.' separated fields in Profile string: %v", err)
|
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
p := &MigProfileInfo{
|
2022-08-11 14:11:04 +00:00
|
|
|
C: c,
|
|
|
|
G: g,
|
|
|
|
GB: gb,
|
|
|
|
Attributes: attrs,
|
|
|
|
}
|
|
|
|
|
|
|
|
switch c {
|
|
|
|
case 1:
|
|
|
|
p.CIProfileID = nvml.COMPUTE_INSTANCE_PROFILE_1_SLICE
|
|
|
|
case 2:
|
|
|
|
p.CIProfileID = nvml.COMPUTE_INSTANCE_PROFILE_2_SLICE
|
|
|
|
case 3:
|
|
|
|
p.CIProfileID = nvml.COMPUTE_INSTANCE_PROFILE_3_SLICE
|
|
|
|
case 4:
|
|
|
|
p.CIProfileID = nvml.COMPUTE_INSTANCE_PROFILE_4_SLICE
|
|
|
|
case 6:
|
|
|
|
p.CIProfileID = nvml.COMPUTE_INSTANCE_PROFILE_6_SLICE
|
|
|
|
case 7:
|
|
|
|
p.CIProfileID = nvml.COMPUTE_INSTANCE_PROFILE_7_SLICE
|
|
|
|
case 8:
|
|
|
|
p.CIProfileID = nvml.COMPUTE_INSTANCE_PROFILE_8_SLICE
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unknown Compute Instance slice size: %v", c)
|
|
|
|
}
|
|
|
|
|
|
|
|
switch g {
|
|
|
|
case 1:
|
|
|
|
p.GIProfileID = nvml.GPU_INSTANCE_PROFILE_1_SLICE
|
|
|
|
case 2:
|
|
|
|
p.GIProfileID = nvml.GPU_INSTANCE_PROFILE_2_SLICE
|
|
|
|
case 3:
|
|
|
|
p.GIProfileID = nvml.GPU_INSTANCE_PROFILE_3_SLICE
|
|
|
|
case 4:
|
|
|
|
p.GIProfileID = nvml.GPU_INSTANCE_PROFILE_4_SLICE
|
|
|
|
case 6:
|
|
|
|
p.GIProfileID = nvml.GPU_INSTANCE_PROFILE_6_SLICE
|
|
|
|
case 7:
|
|
|
|
p.GIProfileID = nvml.GPU_INSTANCE_PROFILE_7_SLICE
|
|
|
|
case 8:
|
|
|
|
p.GIProfileID = nvml.GPU_INSTANCE_PROFILE_8_SLICE
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unknown GPU Instance slice size: %v", g)
|
|
|
|
}
|
|
|
|
|
|
|
|
p.CIEngProfileID = nvml.COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED
|
|
|
|
|
|
|
|
for _, a := range attrs {
|
|
|
|
switch a {
|
|
|
|
case AttributeMediaExtensions:
|
|
|
|
p.GIProfileID = nvml.GPU_INSTANCE_PROFILE_1_SLICE_REV1
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unknown Profile attribute: %v", a)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return p, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// String returns the string representation of a Profile
|
2022-09-16 10:04:59 +00:00
|
|
|
func (p *MigProfileInfo) String() string {
|
2022-08-11 14:11:04 +00:00
|
|
|
var suffix string
|
|
|
|
if len(p.Attributes) > 0 {
|
|
|
|
suffix = "+" + strings.Join(p.Attributes, ",")
|
|
|
|
}
|
|
|
|
if p.C == p.G {
|
|
|
|
return fmt.Sprintf("%dg.%dgb%s", p.G, p.GB, suffix)
|
|
|
|
}
|
|
|
|
return fmt.Sprintf("%dc.%dg.%dgb%s", p.C, p.G, p.GB, suffix)
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetInfo returns detailed info about a Profile
|
2022-09-16 10:04:59 +00:00
|
|
|
func (p *MigProfileInfo) GetInfo() MigProfileInfo {
|
2022-08-11 14:11:04 +00:00
|
|
|
return *p
|
|
|
|
}
|
|
|
|
|
|
|
|
// Equals checks if two Profiles are identical or not
|
2022-09-16 10:04:59 +00:00
|
|
|
func (p *MigProfileInfo) Equals(other MigProfile) bool {
|
2022-08-11 14:11:04 +00:00
|
|
|
switch o := other.(type) {
|
2022-09-16 10:04:59 +00:00
|
|
|
case *MigProfileInfo:
|
2022-08-11 14:11:04 +00:00
|
|
|
if p.C != o.C {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if p.G != o.G {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if p.GB != o.GB {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if p.GIProfileID != o.GIProfileID {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if p.CIProfileID != o.CIProfileID {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if p.CIEngProfileID != o.CIEngProfileID {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
func parseMigProfileField(s string, field string) (int, error) {
|
2022-08-11 14:11:04 +00:00
|
|
|
if strings.TrimSpace(s) != s {
|
|
|
|
return -1, fmt.Errorf("leading or trailing spaces on '%%d%s'", field)
|
|
|
|
}
|
|
|
|
|
|
|
|
if !strings.HasSuffix(s, field) {
|
|
|
|
return -1, fmt.Errorf("missing '%s' from '%%d%s'", field, field)
|
|
|
|
}
|
|
|
|
|
|
|
|
v, err := strconv.Atoi(strings.TrimSuffix(s, field))
|
|
|
|
if err != nil {
|
|
|
|
return -1, fmt.Errorf("malformed number in '%%d%s'", field)
|
|
|
|
}
|
|
|
|
|
|
|
|
return v, nil
|
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
func parseMigProfileFields(s string) (int, int, int, error) {
|
2022-08-11 14:11:04 +00:00
|
|
|
var err error
|
|
|
|
var c, g, gb int
|
|
|
|
|
|
|
|
split := strings.SplitN(s, ".", 3)
|
|
|
|
if len(split) == 3 {
|
2022-09-16 10:04:59 +00:00
|
|
|
c, err = parseMigProfileField(split[0], "c")
|
2022-08-11 14:11:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return -1, -1, -1, err
|
|
|
|
}
|
2022-09-16 10:04:59 +00:00
|
|
|
g, err = parseMigProfileField(split[1], "g")
|
2022-08-11 14:11:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return -1, -1, -1, err
|
|
|
|
}
|
2022-09-16 10:04:59 +00:00
|
|
|
gb, err = parseMigProfileField(split[2], "gb")
|
2022-08-11 14:11:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return -1, -1, -1, err
|
|
|
|
}
|
|
|
|
return c, g, gb, err
|
|
|
|
}
|
|
|
|
if len(split) == 2 {
|
2022-09-16 10:04:59 +00:00
|
|
|
g, err = parseMigProfileField(split[0], "g")
|
2022-08-11 14:11:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return -1, -1, -1, err
|
|
|
|
}
|
2022-09-16 10:04:59 +00:00
|
|
|
gb, err = parseMigProfileField(split[1], "gb")
|
2022-08-11 14:11:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return -1, -1, -1, err
|
|
|
|
}
|
|
|
|
return g, g, gb, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1, -1, -1, fmt.Errorf("parsed wrong number of fields, expected 2 or 3")
|
|
|
|
}
|
|
|
|
|
2022-09-16 10:04:59 +00:00
|
|
|
func parseMigProfileAttributes(s string) ([]string, error) {
|
2022-08-11 14:11:04 +00:00
|
|
|
attr := strings.Split(s, ",")
|
|
|
|
if len(attr) == 0 {
|
|
|
|
return nil, fmt.Errorf("empty attribute list")
|
|
|
|
}
|
|
|
|
unique := make(map[string]int)
|
|
|
|
for _, a := range attr {
|
|
|
|
if unique[a] > 0 {
|
|
|
|
return nil, fmt.Errorf("non unique attribute in list")
|
|
|
|
}
|
|
|
|
if a == "" {
|
|
|
|
return nil, fmt.Errorf("empty attribute in list")
|
|
|
|
}
|
|
|
|
if strings.TrimSpace(a) != a {
|
|
|
|
return nil, fmt.Errorf("leading or trailing spaces in attribute")
|
|
|
|
}
|
|
|
|
if a[0] >= '0' && a[0] <= '9' {
|
|
|
|
return nil, fmt.Errorf("attribute begins with a number")
|
|
|
|
}
|
|
|
|
for _, c := range a {
|
|
|
|
if (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') {
|
|
|
|
return nil, fmt.Errorf("non alpha-numeric character or digit in attribute")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
unique[a]++
|
|
|
|
}
|
|
|
|
return attr, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getMigMemorySizeGB(totalDeviceMemory, migMemorySizeMB uint64) uint64 {
|
|
|
|
const fracDenominator = 8
|
|
|
|
const oneMB = 1024 * 1024
|
|
|
|
const oneGB = 1024 * 1024 * 1024
|
|
|
|
fractionalGpuMem := (float64(migMemorySizeMB) * oneMB) / float64(totalDeviceMemory)
|
|
|
|
fractionalGpuMem = math.Ceil(fractionalGpuMem*fracDenominator) / fracDenominator
|
|
|
|
totalMemGB := float64((totalDeviceMemory + oneGB - 1) / oneGB)
|
|
|
|
return uint64(math.Round(fractionalGpuMem * totalMemGB))
|
|
|
|
}
|