mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 00:08:11 +00:00
Add internal/nvcaps package
This change adds an internal nvcaps pacakge. This package will be migrated to go-nvlib. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
a34a571d2e
commit
a0065456d0
166
internal/nvcaps/nvcaps.go
Normal file
166
internal/nvcaps/nvcaps.go
Normal file
@ -0,0 +1,166 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvcaps
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaProcDriverPath = "/proc/driver/nvidia"
|
||||
nvidiaCapabilitiesPath = nvidiaProcDriverPath + "/capabilities"
|
||||
|
||||
nvcapsProcDriverPath = "/proc/driver/nvidia-caps"
|
||||
nvcapsMigMinorsPath = nvcapsProcDriverPath + "/mig-minors"
|
||||
nvcapsDevicePath = "/dev/nvidia-caps"
|
||||
)
|
||||
|
||||
// MigMinor represents the minor number of a MIG device
|
||||
type MigMinor int
|
||||
|
||||
// MigCap represents the path to a MIG cap file
|
||||
type MigCap string
|
||||
|
||||
// MigCaps stores a map of MIG cap file paths to MIG minors
|
||||
type MigCaps map[MigCap]MigMinor
|
||||
|
||||
// NewGPUInstanceCap creates a MigCap for the specified MIG GPU instance.
|
||||
// A GPU instance is uniquely defined by the GPU minor number and GI instance ID.
|
||||
func NewGPUInstanceCap(gpu, gi int) MigCap {
|
||||
return MigCap(fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", gpu, gi))
|
||||
}
|
||||
|
||||
// NewComputeInstanceCap creates a MigCap for the specified MIG Compute instance.
|
||||
// A GPU instance is uniquely defined by the GPU minor number, GI instance ID, and CI instance ID.
|
||||
func NewComputeInstanceCap(gpu, gi, ci int) MigCap {
|
||||
return MigCap(fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci))
|
||||
}
|
||||
|
||||
// GetCapDevicePath returns the path to the cap device for the specified cap.
|
||||
// An error is returned if the cap is invalid.
|
||||
func (m MigCaps) GetCapDevicePath(cap MigCap) (string, error) {
|
||||
minor, exists := m[cap]
|
||||
if !exists {
|
||||
return "", fmt.Errorf("invalid MIG capability path %v", cap)
|
||||
}
|
||||
return minor.DevicePath(), nil
|
||||
}
|
||||
|
||||
// NewMigCaps creates a MigCaps structure based on the contents of the MIG minors file.
|
||||
func NewMigCaps() (MigCaps, error) {
|
||||
// Open nvcapsMigMinorsPath for walking.
|
||||
// If the nvcapsMigMinorsPath does not exist, then we are not on a MIG
|
||||
// capable machine, so there is nothing to do.
|
||||
// The format of this file is discussed in:
|
||||
// https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#unique_1576522674
|
||||
minorsFile, err := os.Open(nvcapsMigMinorsPath)
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening MIG minors file: %v", err)
|
||||
}
|
||||
defer minorsFile.Close()
|
||||
|
||||
return processMinorsFile(minorsFile), nil
|
||||
}
|
||||
|
||||
func processMinorsFile(minorsFile io.Reader) MigCaps {
|
||||
// Walk each line of nvcapsMigMinorsPath and construct a mapping of nvidia
|
||||
// capabilities path to device minor for that capability
|
||||
migCaps := make(MigCaps)
|
||||
scanner := bufio.NewScanner(minorsFile)
|
||||
for scanner.Scan() {
|
||||
cap, minor, err := processMigMinorsLine(scanner.Text())
|
||||
if err != nil {
|
||||
log.Printf("Skipping line in MIG minors file: %v", err)
|
||||
continue
|
||||
}
|
||||
migCaps[cap] = minor
|
||||
}
|
||||
return migCaps
|
||||
}
|
||||
|
||||
func processMigMinorsLine(line string) (MigCap, MigMinor, error) {
|
||||
parts := strings.Split(line, " ")
|
||||
if len(parts) != 2 {
|
||||
return "", 0, fmt.Errorf("error processing line: %v", line)
|
||||
}
|
||||
|
||||
migCap := MigCap(parts[0])
|
||||
if !migCap.isValid() {
|
||||
return "", 0, fmt.Errorf("invalid MIG minors line: '%v'", line)
|
||||
}
|
||||
|
||||
minor, err := strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("error reading MIG minor from '%v': %v", line, err)
|
||||
}
|
||||
|
||||
return migCap, MigMinor(minor), nil
|
||||
}
|
||||
|
||||
func (m MigCap) isValid() bool {
|
||||
cap := string(m)
|
||||
switch cap {
|
||||
case "config", "monitor":
|
||||
return true
|
||||
default:
|
||||
var gpu int
|
||||
var gi int
|
||||
var ci int
|
||||
// Look for a CI access file
|
||||
n, _ := fmt.Sscanf(cap, "gpu%d/gi%d/ci%d/access", &gpu, &gi, &ci)
|
||||
if n == 3 {
|
||||
return true
|
||||
}
|
||||
// Look for a GI access file
|
||||
n, _ = fmt.Sscanf(cap, "gpu%d/gi%d/access %d", &gpu, &gi)
|
||||
if n == 2 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProcPath returns the proc path associated with the MIG capability
|
||||
func (m MigCap) ProcPath() string {
|
||||
id := string(m)
|
||||
|
||||
var path string
|
||||
switch id {
|
||||
case "config", "monitor":
|
||||
path = "mig/" + id
|
||||
default:
|
||||
parts := strings.SplitN(id, "/", 2)
|
||||
path = strings.Join([]string{parts[0], "mig", parts[1]}, "/")
|
||||
}
|
||||
return filepath.Join(nvidiaCapabilitiesPath, path)
|
||||
}
|
||||
|
||||
// DevicePath returns the path for the nvidia-caps device with the specified
|
||||
// minor number
|
||||
func (m MigMinor) DevicePath() string {
|
||||
return fmt.Sprintf(nvcapsDevicePath+"/nvidia-cap%d", m)
|
||||
}
|
100
internal/nvcaps/nvcaps_test.go
Normal file
100
internal/nvcaps/nvcaps_test.go
Normal file
@ -0,0 +1,100 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvcaps
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestProcessMinorsFile(t *testing.T) {
|
||||
testCases := []struct {
|
||||
lines []string
|
||||
expected MigCaps
|
||||
}{
|
||||
{[]string{}, MigCaps{}},
|
||||
{[]string{"invalidLine"}, MigCaps{}},
|
||||
{[]string{"config 1"}, MigCaps{"config": 1}},
|
||||
{[]string{"gpu0/gi0/ci0/access 4"}, MigCaps{"gpu0/gi0/ci0/access": 4}},
|
||||
{[]string{"config 1", "invalidLine"}, MigCaps{"config": 1}},
|
||||
{[]string{"config 1", "gpu0/gi0/ci0/access 4"}, MigCaps{"config": 1, "gpu0/gi0/ci0/access": 4}},
|
||||
}
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) {
|
||||
contents := strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
d := processMinorsFile(contents)
|
||||
require.Equal(t, tc.expected, d)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessMigMinorsLine(t *testing.T) {
|
||||
testCases := []struct {
|
||||
line string
|
||||
cap MigCap
|
||||
minor MigMinor
|
||||
err bool
|
||||
}{
|
||||
{"config 1", "config", 1, false},
|
||||
{"monitor 2", "monitor", 2, false},
|
||||
{"gpu0/gi0/access 3", "gpu0/gi0/access", 3, false},
|
||||
{"gpu0/gi0/ci0/access 4", "gpu0/gi0/ci0/access", 4, false},
|
||||
{"notconfig 99", "", 0, true},
|
||||
{"config notanint", "", 0, true},
|
||||
{"", "", 0, true},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) {
|
||||
cap, minor, err := processMigMinorsLine(tc.line)
|
||||
|
||||
require.Equal(t, tc.cap, cap)
|
||||
require.Equal(t, tc.minor, minor)
|
||||
if tc.err {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigCapProcPaths(t *testing.T) {
|
||||
testCases := []struct {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"config", "/proc/driver/nvidia/capabilities/mig/config"},
|
||||
{"monitor", "/proc/driver/nvidia/capabilities/mig/monitor"},
|
||||
{"gpu0/gi0/access", "/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"},
|
||||
{"gpu0/gi0/ci0/access", "/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"},
|
||||
}
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) {
|
||||
m := MigCap(tc.input)
|
||||
require.Equal(t, tc.expected, m.ProcPath())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMigMinorDevicePath(t *testing.T) {
|
||||
m := MigMinor(0)
|
||||
require.Equal(t, "/dev/nvidia-caps/nvidia-cap0", m.DevicePath())
|
||||
}
|
Loading…
Reference in New Issue
Block a user