mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 08:18:32 +00:00
Use CUDA image abstraction for runtime hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
8f0e1906c2
commit
50cf07e4cd
@ -7,9 +7,9 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
@ -104,45 +104,6 @@ type HookState struct {
|
||||
BundlePath string `json:"bundlePath"`
|
||||
}
|
||||
|
||||
func parseCudaVersion(cudaVersion string) (uint32, uint32) {
|
||||
major, minor, err := parseMajorMinorVersion(cudaVersion)
|
||||
if err != nil {
|
||||
log.Panicln("invalid CUDA Version", cudaVersion, err)
|
||||
}
|
||||
return major, minor
|
||||
}
|
||||
|
||||
func parseMajorMinorVersion(version string) (uint32, uint32, error) {
|
||||
if !semver.IsValid("v" + version) {
|
||||
return 0, 0, fmt.Errorf("invalid version string")
|
||||
}
|
||||
|
||||
majorMinor := strings.TrimPrefix(semver.MajorMinor("v"+version), "v")
|
||||
parts := strings.Split(majorMinor, ".")
|
||||
|
||||
major, err := strconv.ParseUint(parts[0], 10, 32)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("invalid major version")
|
||||
}
|
||||
minor, err := strconv.ParseUint(parts[1], 10, 32)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("invalid minor version")
|
||||
}
|
||||
return uint32(major), uint32(minor), nil
|
||||
}
|
||||
|
||||
func getEnvMap(e []string) (m map[string]string) {
|
||||
m = make(map[string]string)
|
||||
for _, s := range e {
|
||||
p := strings.SplitN(s, "=", 2)
|
||||
if len(p) != 2 {
|
||||
log.Panicln("environment error")
|
||||
}
|
||||
m[p[0]] = p[1]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func loadSpec(path string) (spec *Spec) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
@ -204,12 +165,6 @@ func isPrivileged(s *Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func isLegacyCUDAImage(env map[string]string) bool {
|
||||
legacyCudaVersion := env[envCUDAVersion]
|
||||
cudaRequire := env[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
||||
// Build a list of envvars to consider.
|
||||
envVars := []string{envNVVisibleDevices}
|
||||
@ -348,27 +303,11 @@ func getDriverCapabilities(env map[string]string, supportedDriverCapabilities Dr
|
||||
return capabilities
|
||||
}
|
||||
|
||||
func getRequirements(env map[string]string, legacyImage bool) []string {
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range env {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
if legacyImage {
|
||||
vmaj, vmin := parseCudaVersion(env[envCUDAVersion])
|
||||
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
}
|
||||
return requirements
|
||||
}
|
||||
|
||||
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := isLegacyCUDAImage(env)
|
||||
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := image.IsLegacy()
|
||||
|
||||
var devices string
|
||||
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
||||
if d := getDevices(hookConfig, image, mounts, privileged, legacyImage); d != nil {
|
||||
devices = *d
|
||||
} else {
|
||||
// 'nil' devices means this is not a GPU container.
|
||||
@ -376,7 +315,7 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
|
||||
}
|
||||
|
||||
var migConfigDevices string
|
||||
if d := getMigConfigDevices(env); d != nil {
|
||||
if d := getMigConfigDevices(image); d != nil {
|
||||
migConfigDevices = *d
|
||||
}
|
||||
if !privileged && migConfigDevices != "" {
|
||||
@ -384,19 +323,21 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou
|
||||
}
|
||||
|
||||
var migMonitorDevices string
|
||||
if d := getMigMonitorDevices(env); d != nil {
|
||||
if d := getMigMonitorDevices(image); d != nil {
|
||||
migMonitorDevices = *d
|
||||
}
|
||||
if !privileged && migMonitorDevices != "" {
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
driverCapabilities := getDriverCapabilities(env, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
|
||||
requirements := getRequirements(env, legacyImage)
|
||||
requirements, err := image.GetRequirements()
|
||||
if err != nil {
|
||||
log.Panicln("failed to get requirements", err)
|
||||
}
|
||||
|
||||
// Don't fail on invalid values.
|
||||
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
||||
disableRequire := image.HasDisableRequire()
|
||||
|
||||
return &nvidiaConfig{
|
||||
Devices: devices,
|
||||
@ -422,13 +363,17 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
env := getEnvMap(s.Process.Env)
|
||||
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
privileged := isPrivileged(s)
|
||||
envSwarmGPU = hook.SwarmResource
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Env: env,
|
||||
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
||||
Env: image,
|
||||
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
|
||||
}
|
||||
}
|
||||
|
@ -7,51 +7,6 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseCudaVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
expected [2]uint32
|
||||
}{
|
||||
{"0", [2]uint32{0, 0}},
|
||||
{"8", [2]uint32{8, 0}},
|
||||
{"7.5", [2]uint32{7, 5}},
|
||||
{"9.0.116", [2]uint32{9, 0}},
|
||||
{"4294967295.4294967295.4294967295", [2]uint32{4294967295, 4294967295}},
|
||||
}
|
||||
for i, c := range tests {
|
||||
vmaj, vmin := parseCudaVersion(c.version)
|
||||
|
||||
version := [2]uint32{vmaj, vmin}
|
||||
|
||||
require.Equal(t, c.expected, version, "%d: %v", i, c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCudaVersionInvalid(t *testing.T) {
|
||||
var tests = []string{
|
||||
"foo",
|
||||
"foo.5.10",
|
||||
"9.0.116.50",
|
||||
"9.0.116foo",
|
||||
"7.foo",
|
||||
"9.0.bar",
|
||||
"9.4294967296",
|
||||
"9.0.116.",
|
||||
"9..0",
|
||||
"9.",
|
||||
".5.10",
|
||||
"-9",
|
||||
"+9",
|
||||
"-9.1.116",
|
||||
"-9.-1.-116",
|
||||
}
|
||||
for _, c := range tests {
|
||||
require.Panics(t, func() {
|
||||
parseCudaVersion(c)
|
||||
}, "parseCudaVersion(%v)", c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPrivileged(t *testing.T) {
|
||||
var tests = []struct {
|
||||
spec string
|
||||
|
Loading…
Reference in New Issue
Block a user