mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Use CUDA image abstraction for runtime hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
8f0e1906c2
commit
50cf07e4cd
@ -7,9 +7,9 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
"golang.org/x/mod/semver"
|
"golang.org/x/mod/semver"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -104,45 +104,6 @@ type HookState struct {
|
|||||||
BundlePath string `json:"bundlePath"`
|
BundlePath string `json:"bundlePath"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseCudaVersion(cudaVersion string) (uint32, uint32) {
|
|
||||||
major, minor, err := parseMajorMinorVersion(cudaVersion)
|
|
||||||
if err != nil {
|
|
||||||
log.Panicln("invalid CUDA Version", cudaVersion, err)
|
|
||||||
}
|
|
||||||
return major, minor
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseMajorMinorVersion(version string) (uint32, uint32, error) {
|
|
||||||
if !semver.IsValid("v" + version) {
|
|
||||||
return 0, 0, fmt.Errorf("invalid version string")
|
|
||||||
}
|
|
||||||
|
|
||||||
majorMinor := strings.TrimPrefix(semver.MajorMinor("v"+version), "v")
|
|
||||||
parts := strings.Split(majorMinor, ".")
|
|
||||||
|
|
||||||
major, err := strconv.ParseUint(parts[0], 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return 0, 0, fmt.Errorf("invalid major version")
|
|
||||||
}
|
|
||||||
minor, err := strconv.ParseUint(parts[1], 10, 32)
|
|
||||||
if err != nil {
|
|
||||||
return 0, 0, fmt.Errorf("invalid minor version")
|
|
||||||
}
|
|
||||||
return uint32(major), uint32(minor), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getEnvMap(e []string) (m map[string]string) {
|
|
||||||
m = make(map[string]string)
|
|
||||||
for _, s := range e {
|
|
||||||
p := strings.SplitN(s, "=", 2)
|
|
||||||
if len(p) != 2 {
|
|
||||||
log.Panicln("environment error")
|
|
||||||
}
|
|
||||||
m[p[0]] = p[1]
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func loadSpec(path string) (spec *Spec) {
|
func loadSpec(path string) (spec *Spec) {
|
||||||
f, err := os.Open(path)
|
f, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -204,12 +165,6 @@ func isPrivileged(s *Spec) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func isLegacyCUDAImage(env map[string]string) bool {
|
|
||||||
legacyCudaVersion := env[envCUDAVersion]
|
|
||||||
cudaRequire := env[envNVRequireCUDA]
|
|
||||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
||||||
// Build a list of envvars to consider.
|
// Build a list of envvars to consider.
|
||||||
envVars := []string{envNVVisibleDevices}
|
envVars := []string{envNVVisibleDevices}
|
||||||
@ -348,27 +303,11 @@ func getDriverCapabilities(env map[string]string, supportedDriverCapabilities Dr
|
|||||||
return capabilities
|
return capabilities
|
||||||
}
|
}
|
||||||
|
|
||||||
func getRequirements(env map[string]string, legacyImage bool) []string {
|
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
legacyImage := image.IsLegacy()
|
||||||
var requirements []string
|
|
||||||
for name, value := range env {
|
|
||||||
if strings.HasPrefix(name, envNVRequirePrefix) {
|
|
||||||
requirements = append(requirements, value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if legacyImage {
|
|
||||||
vmaj, vmin := parseCudaVersion(env[envCUDAVersion])
|
|
||||||
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
|
||||||
requirements = append(requirements, cudaRequire)
|
|
||||||
}
|
|
||||||
return requirements
|
|
||||||
}
|
|
||||||
|
|
||||||
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
|
||||||
legacyImage := isLegacyCUDAImage(env)
|
|
||||||
|
|
||||||
var devices string
|
var devices string
|
||||||
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
if d := getDevices(hookConfig, image, mounts, privileged, legacyImage); d != nil {
|
||||||
devices = *d
|
devices = *d
|
||||||
} else {
|
} else {
|
||||||
// 'nil' devices means this is not a GPU container.
|
// 'nil' devices means this is not a GPU container.
|
||||||
@ -376,7 +315,7 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
|
|||||||
}
|
}
|
||||||
|
|
||||||
var migConfigDevices string
|
var migConfigDevices string
|
||||||
if d := getMigConfigDevices(env); d != nil {
|
if d := getMigConfigDevices(image); d != nil {
|
||||||
migConfigDevices = *d
|
migConfigDevices = *d
|
||||||
}
|
}
|
||||||
if !privileged && migConfigDevices != "" {
|
if !privileged && migConfigDevices != "" {
|
||||||
@ -384,19 +323,21 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
|
|||||||
}
|
}
|
||||||
|
|
||||||
var migMonitorDevices string
|
var migMonitorDevices string
|
||||||
if d := getMigMonitorDevices(env); d != nil {
|
if d := getMigMonitorDevices(image); d != nil {
|
||||||
migMonitorDevices = *d
|
migMonitorDevices = *d
|
||||||
}
|
}
|
||||||
if !privileged && migMonitorDevices != "" {
|
if !privileged && migMonitorDevices != "" {
|
||||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||||
}
|
}
|
||||||
|
|
||||||
driverCapabilities := getDriverCapabilities(env, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||||
|
|
||||||
requirements := getRequirements(env, legacyImage)
|
requirements, err := image.GetRequirements()
|
||||||
|
if err != nil {
|
||||||
|
log.Panicln("failed to get requirements", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Don't fail on invalid values.
|
disableRequire := image.HasDisableRequire()
|
||||||
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
|
||||||
|
|
||||||
return &nvidiaConfig{
|
return &nvidiaConfig{
|
||||||
Devices: devices,
|
Devices: devices,
|
||||||
@ -422,13 +363,17 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
|||||||
|
|
||||||
s := loadSpec(path.Join(b, "config.json"))
|
s := loadSpec(path.Join(b, "config.json"))
|
||||||
|
|
||||||
env := getEnvMap(s.Process.Env)
|
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
|
||||||
|
if err != nil {
|
||||||
|
log.Panicln(err)
|
||||||
|
}
|
||||||
|
|
||||||
privileged := isPrivileged(s)
|
privileged := isPrivileged(s)
|
||||||
envSwarmGPU = hook.SwarmResource
|
envSwarmGPU = hook.SwarmResource
|
||||||
return containerConfig{
|
return containerConfig{
|
||||||
Pid: h.Pid,
|
Pid: h.Pid,
|
||||||
Rootfs: s.Root.Path,
|
Rootfs: s.Root.Path,
|
||||||
Env: env,
|
Env: image,
|
||||||
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,51 +7,6 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParseCudaVersionValid(t *testing.T) {
|
|
||||||
var tests = []struct {
|
|
||||||
version string
|
|
||||||
expected [2]uint32
|
|
||||||
}{
|
|
||||||
{"0", [2]uint32{0, 0}},
|
|
||||||
{"8", [2]uint32{8, 0}},
|
|
||||||
{"7.5", [2]uint32{7, 5}},
|
|
||||||
{"9.0.116", [2]uint32{9, 0}},
|
|
||||||
{"4294967295.4294967295.4294967295", [2]uint32{4294967295, 4294967295}},
|
|
||||||
}
|
|
||||||
for i, c := range tests {
|
|
||||||
vmaj, vmin := parseCudaVersion(c.version)
|
|
||||||
|
|
||||||
version := [2]uint32{vmaj, vmin}
|
|
||||||
|
|
||||||
require.Equal(t, c.expected, version, "%d: %v", i, c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseCudaVersionInvalid(t *testing.T) {
|
|
||||||
var tests = []string{
|
|
||||||
"foo",
|
|
||||||
"foo.5.10",
|
|
||||||
"9.0.116.50",
|
|
||||||
"9.0.116foo",
|
|
||||||
"7.foo",
|
|
||||||
"9.0.bar",
|
|
||||||
"9.4294967296",
|
|
||||||
"9.0.116.",
|
|
||||||
"9..0",
|
|
||||||
"9.",
|
|
||||||
".5.10",
|
|
||||||
"-9",
|
|
||||||
"+9",
|
|
||||||
"-9.1.116",
|
|
||||||
"-9.-1.-116",
|
|
||||||
}
|
|
||||||
for _, c := range tests {
|
|
||||||
require.Panics(t, func() {
|
|
||||||
parseCudaVersion(c)
|
|
||||||
}, "parseCudaVersion(%v)", c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsPrivileged(t *testing.T) {
|
func TestIsPrivileged(t *testing.T) {
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
spec string
|
spec string
|
||||||
|
Loading…
Reference in New Issue
Block a user