From a0065456d03cd62a8e01a64a7e33db17914441b6 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 28 Sep 2022 14:38:54 +0200 Subject: [PATCH] Add internal/nvcaps package This change adds an internal nvcaps pacakge. This package will be migrated to go-nvlib. Signed-off-by: Evan Lezar --- internal/nvcaps/nvcaps.go | 166 +++++++++++++++++++++++++++++++++ internal/nvcaps/nvcaps_test.go | 100 ++++++++++++++++++++ 2 files changed, 266 insertions(+) create mode 100644 internal/nvcaps/nvcaps.go create mode 100644 internal/nvcaps/nvcaps_test.go diff --git a/internal/nvcaps/nvcaps.go b/internal/nvcaps/nvcaps.go new file mode 100644 index 00000000..be490084 --- /dev/null +++ b/internal/nvcaps/nvcaps.go @@ -0,0 +1,166 @@ +/* +# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package nvcaps + +import ( + "bufio" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strconv" + "strings" +) + +const ( + nvidiaProcDriverPath = "/proc/driver/nvidia" + nvidiaCapabilitiesPath = nvidiaProcDriverPath + "/capabilities" + + nvcapsProcDriverPath = "/proc/driver/nvidia-caps" + nvcapsMigMinorsPath = nvcapsProcDriverPath + "/mig-minors" + nvcapsDevicePath = "/dev/nvidia-caps" +) + +// MigMinor represents the minor number of a MIG device +type MigMinor int + +// MigCap represents the path to a MIG cap file +type MigCap string + +// MigCaps stores a map of MIG cap file paths to MIG minors +type MigCaps map[MigCap]MigMinor + +// NewGPUInstanceCap creates a MigCap for the specified MIG GPU instance. +// A GPU instance is uniquely defined by the GPU minor number and GI instance ID. +func NewGPUInstanceCap(gpu, gi int) MigCap { + return MigCap(fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", gpu, gi)) +} + +// NewComputeInstanceCap creates a MigCap for the specified MIG Compute instance. +// A GPU instance is uniquely defined by the GPU minor number, GI instance ID, and CI instance ID. +func NewComputeInstanceCap(gpu, gi, ci int) MigCap { + return MigCap(fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)) +} + +// GetCapDevicePath returns the path to the cap device for the specified cap. +// An error is returned if the cap is invalid. +func (m MigCaps) GetCapDevicePath(cap MigCap) (string, error) { + minor, exists := m[cap] + if !exists { + return "", fmt.Errorf("invalid MIG capability path %v", cap) + } + return minor.DevicePath(), nil +} + +// NewMigCaps creates a MigCaps structure based on the contents of the MIG minors file. +func NewMigCaps() (MigCaps, error) { + // Open nvcapsMigMinorsPath for walking. + // If the nvcapsMigMinorsPath does not exist, then we are not on a MIG + // capable machine, so there is nothing to do. + // The format of this file is discussed in: + // https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#unique_1576522674 + minorsFile, err := os.Open(nvcapsMigMinorsPath) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("error opening MIG minors file: %v", err) + } + defer minorsFile.Close() + + return processMinorsFile(minorsFile), nil +} + +func processMinorsFile(minorsFile io.Reader) MigCaps { + // Walk each line of nvcapsMigMinorsPath and construct a mapping of nvidia + // capabilities path to device minor for that capability + migCaps := make(MigCaps) + scanner := bufio.NewScanner(minorsFile) + for scanner.Scan() { + cap, minor, err := processMigMinorsLine(scanner.Text()) + if err != nil { + log.Printf("Skipping line in MIG minors file: %v", err) + continue + } + migCaps[cap] = minor + } + return migCaps +} + +func processMigMinorsLine(line string) (MigCap, MigMinor, error) { + parts := strings.Split(line, " ") + if len(parts) != 2 { + return "", 0, fmt.Errorf("error processing line: %v", line) + } + + migCap := MigCap(parts[0]) + if !migCap.isValid() { + return "", 0, fmt.Errorf("invalid MIG minors line: '%v'", line) + } + + minor, err := strconv.Atoi(parts[1]) + if err != nil { + return "", 0, fmt.Errorf("error reading MIG minor from '%v': %v", line, err) + } + + return migCap, MigMinor(minor), nil +} + +func (m MigCap) isValid() bool { + cap := string(m) + switch cap { + case "config", "monitor": + return true + default: + var gpu int + var gi int + var ci int + // Look for a CI access file + n, _ := fmt.Sscanf(cap, "gpu%d/gi%d/ci%d/access", &gpu, &gi, &ci) + if n == 3 { + return true + } + // Look for a GI access file + n, _ = fmt.Sscanf(cap, "gpu%d/gi%d/access %d", &gpu, &gi) + if n == 2 { + return true + } + } + return false +} + +// ProcPath returns the proc path associated with the MIG capability +func (m MigCap) ProcPath() string { + id := string(m) + + var path string + switch id { + case "config", "monitor": + path = "mig/" + id + default: + parts := strings.SplitN(id, "/", 2) + path = strings.Join([]string{parts[0], "mig", parts[1]}, "/") + } + return filepath.Join(nvidiaCapabilitiesPath, path) +} + +// DevicePath returns the path for the nvidia-caps device with the specified +// minor number +func (m MigMinor) DevicePath() string { + return fmt.Sprintf(nvcapsDevicePath+"/nvidia-cap%d", m) +} diff --git a/internal/nvcaps/nvcaps_test.go b/internal/nvcaps/nvcaps_test.go new file mode 100644 index 00000000..826eee2a --- /dev/null +++ b/internal/nvcaps/nvcaps_test.go @@ -0,0 +1,100 @@ +/* +# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package nvcaps + +import ( + "fmt" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestProcessMinorsFile(t *testing.T) { + testCases := []struct { + lines []string + expected MigCaps + }{ + {[]string{}, MigCaps{}}, + {[]string{"invalidLine"}, MigCaps{}}, + {[]string{"config 1"}, MigCaps{"config": 1}}, + {[]string{"gpu0/gi0/ci0/access 4"}, MigCaps{"gpu0/gi0/ci0/access": 4}}, + {[]string{"config 1", "invalidLine"}, MigCaps{"config": 1}}, + {[]string{"config 1", "gpu0/gi0/ci0/access 4"}, MigCaps{"config": 1, "gpu0/gi0/ci0/access": 4}}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) { + contents := strings.NewReader(strings.Join(tc.lines, "\n")) + d := processMinorsFile(contents) + require.Equal(t, tc.expected, d) + }) + } +} + +func TestProcessMigMinorsLine(t *testing.T) { + testCases := []struct { + line string + cap MigCap + minor MigMinor + err bool + }{ + {"config 1", "config", 1, false}, + {"monitor 2", "monitor", 2, false}, + {"gpu0/gi0/access 3", "gpu0/gi0/access", 3, false}, + {"gpu0/gi0/ci0/access 4", "gpu0/gi0/ci0/access", 4, false}, + {"notconfig 99", "", 0, true}, + {"config notanint", "", 0, true}, + {"", "", 0, true}, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) { + cap, minor, err := processMigMinorsLine(tc.line) + + require.Equal(t, tc.cap, cap) + require.Equal(t, tc.minor, minor) + if tc.err { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestMigCapProcPaths(t *testing.T) { + testCases := []struct { + input string + expected string + }{ + {"config", "/proc/driver/nvidia/capabilities/mig/config"}, + {"monitor", "/proc/driver/nvidia/capabilities/mig/monitor"}, + {"gpu0/gi0/access", "/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"}, + {"gpu0/gi0/ci0/access", "/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("testcase %d", i), func(t *testing.T) { + m := MigCap(tc.input) + require.Equal(t, tc.expected, m.ProcPath()) + }) + } +} + +func TestMigMinorDevicePath(t *testing.T) { + m := MigMinor(0) + require.Equal(t, "/dev/nvidia-caps/nvidia-cap0", m.DevicePath()) +}