mirror of
				https://github.com/NVIDIA/nvidia-container-toolkit
				synced 2025-06-26 18:18:24 +00:00 
			
		
		
		
	Use CUDA image abstraction for runtime hook
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
		
							parent
							
								
									8f0e1906c2
								
							
						
					
					
						commit
						50cf07e4cd
					
				| @ -7,9 +7,9 @@ import ( | |||||||
| 	"os" | 	"os" | ||||||
| 	"path" | 	"path" | ||||||
| 	"path/filepath" | 	"path/filepath" | ||||||
| 	"strconv" |  | ||||||
| 	"strings" | 	"strings" | ||||||
| 
 | 
 | ||||||
|  | 	"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" | ||||||
| 	"golang.org/x/mod/semver" | 	"golang.org/x/mod/semver" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| @ -104,45 +104,6 @@ type HookState struct { | |||||||
| 	BundlePath string `json:"bundlePath"` | 	BundlePath string `json:"bundlePath"` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func parseCudaVersion(cudaVersion string) (uint32, uint32) { |  | ||||||
| 	major, minor, err := parseMajorMinorVersion(cudaVersion) |  | ||||||
| 	if err != nil { |  | ||||||
| 		log.Panicln("invalid CUDA Version", cudaVersion, err) |  | ||||||
| 	} |  | ||||||
| 	return major, minor |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func parseMajorMinorVersion(version string) (uint32, uint32, error) { |  | ||||||
| 	if !semver.IsValid("v" + version) { |  | ||||||
| 		return 0, 0, fmt.Errorf("invalid version string") |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	majorMinor := strings.TrimPrefix(semver.MajorMinor("v"+version), "v") |  | ||||||
| 	parts := strings.Split(majorMinor, ".") |  | ||||||
| 
 |  | ||||||
| 	major, err := strconv.ParseUint(parts[0], 10, 32) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return 0, 0, fmt.Errorf("invalid major version") |  | ||||||
| 	} |  | ||||||
| 	minor, err := strconv.ParseUint(parts[1], 10, 32) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return 0, 0, fmt.Errorf("invalid minor version") |  | ||||||
| 	} |  | ||||||
| 	return uint32(major), uint32(minor), nil |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func getEnvMap(e []string) (m map[string]string) { |  | ||||||
| 	m = make(map[string]string) |  | ||||||
| 	for _, s := range e { |  | ||||||
| 		p := strings.SplitN(s, "=", 2) |  | ||||||
| 		if len(p) != 2 { |  | ||||||
| 			log.Panicln("environment error") |  | ||||||
| 		} |  | ||||||
| 		m[p[0]] = p[1] |  | ||||||
| 	} |  | ||||||
| 	return |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func loadSpec(path string) (spec *Spec) { | func loadSpec(path string) (spec *Spec) { | ||||||
| 	f, err := os.Open(path) | 	f, err := os.Open(path) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| @ -204,12 +165,6 @@ func isPrivileged(s *Spec) bool { | |||||||
| 	return false | 	return false | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func isLegacyCUDAImage(env map[string]string) bool { |  | ||||||
| 	legacyCudaVersion := env[envCUDAVersion] |  | ||||||
| 	cudaRequire := env[envNVRequireCUDA] |  | ||||||
| 	return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string { | func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string { | ||||||
| 	// Build a list of envvars to consider.
 | 	// Build a list of envvars to consider.
 | ||||||
| 	envVars := []string{envNVVisibleDevices} | 	envVars := []string{envNVVisibleDevices} | ||||||
| @ -348,27 +303,11 @@ func getDriverCapabilities(env map[string]string, supportedDriverCapabilities Dr | |||||||
| 	return capabilities | 	return capabilities | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func getRequirements(env map[string]string, legacyImage bool) []string { | func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig { | ||||||
| 	// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
 | 	legacyImage := image.IsLegacy() | ||||||
| 	var requirements []string |  | ||||||
| 	for name, value := range env { |  | ||||||
| 		if strings.HasPrefix(name, envNVRequirePrefix) { |  | ||||||
| 			requirements = append(requirements, value) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	if legacyImage { |  | ||||||
| 		vmaj, vmin := parseCudaVersion(env[envCUDAVersion]) |  | ||||||
| 		cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin) |  | ||||||
| 		requirements = append(requirements, cudaRequire) |  | ||||||
| 	} |  | ||||||
| 	return requirements |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig { |  | ||||||
| 	legacyImage := isLegacyCUDAImage(env) |  | ||||||
| 
 | 
 | ||||||
| 	var devices string | 	var devices string | ||||||
| 	if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil { | 	if d := getDevices(hookConfig, image, mounts, privileged, legacyImage); d != nil { | ||||||
| 		devices = *d | 		devices = *d | ||||||
| 	} else { | 	} else { | ||||||
| 		// 'nil' devices means this is not a GPU container.
 | 		// 'nil' devices means this is not a GPU container.
 | ||||||
| @ -376,7 +315,7 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	var migConfigDevices string | 	var migConfigDevices string | ||||||
| 	if d := getMigConfigDevices(env); d != nil { | 	if d := getMigConfigDevices(image); d != nil { | ||||||
| 		migConfigDevices = *d | 		migConfigDevices = *d | ||||||
| 	} | 	} | ||||||
| 	if !privileged && migConfigDevices != "" { | 	if !privileged && migConfigDevices != "" { | ||||||
| @ -384,19 +323,21 @@ func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mou | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	var migMonitorDevices string | 	var migMonitorDevices string | ||||||
| 	if d := getMigMonitorDevices(env); d != nil { | 	if d := getMigMonitorDevices(image); d != nil { | ||||||
| 		migMonitorDevices = *d | 		migMonitorDevices = *d | ||||||
| 	} | 	} | ||||||
| 	if !privileged && migMonitorDevices != "" { | 	if !privileged && migMonitorDevices != "" { | ||||||
| 		log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container") | 		log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container") | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	driverCapabilities := getDriverCapabilities(env, hookConfig.SupportedDriverCapabilities, legacyImage).String() | 	driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String() | ||||||
| 
 | 
 | ||||||
| 	requirements := getRequirements(env, legacyImage) | 	requirements, err := image.GetRequirements() | ||||||
|  | 	if err != nil { | ||||||
|  | 		log.Panicln("failed to get requirements", err) | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	// Don't fail on invalid values.
 | 	disableRequire := image.HasDisableRequire() | ||||||
| 	disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire]) |  | ||||||
| 
 | 
 | ||||||
| 	return &nvidiaConfig{ | 	return &nvidiaConfig{ | ||||||
| 		Devices:            devices, | 		Devices:            devices, | ||||||
| @ -422,13 +363,17 @@ func getContainerConfig(hook HookConfig) (config containerConfig) { | |||||||
| 
 | 
 | ||||||
| 	s := loadSpec(path.Join(b, "config.json")) | 	s := loadSpec(path.Join(b, "config.json")) | ||||||
| 
 | 
 | ||||||
| 	env := getEnvMap(s.Process.Env) | 	image, err := image.NewCUDAImageFromEnv(s.Process.Env) | ||||||
|  | 	if err != nil { | ||||||
|  | 		log.Panicln(err) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	privileged := isPrivileged(s) | 	privileged := isPrivileged(s) | ||||||
| 	envSwarmGPU = hook.SwarmResource | 	envSwarmGPU = hook.SwarmResource | ||||||
| 	return containerConfig{ | 	return containerConfig{ | ||||||
| 		Pid:    h.Pid, | 		Pid:    h.Pid, | ||||||
| 		Rootfs: s.Root.Path, | 		Rootfs: s.Root.Path, | ||||||
| 		Env:    env, | 		Env:    image, | ||||||
| 		Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged), | 		Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged), | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | |||||||
| @ -7,51 +7,6 @@ import ( | |||||||
| 	"github.com/stretchr/testify/require" | 	"github.com/stretchr/testify/require" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| func TestParseCudaVersionValid(t *testing.T) { |  | ||||||
| 	var tests = []struct { |  | ||||||
| 		version  string |  | ||||||
| 		expected [2]uint32 |  | ||||||
| 	}{ |  | ||||||
| 		{"0", [2]uint32{0, 0}}, |  | ||||||
| 		{"8", [2]uint32{8, 0}}, |  | ||||||
| 		{"7.5", [2]uint32{7, 5}}, |  | ||||||
| 		{"9.0.116", [2]uint32{9, 0}}, |  | ||||||
| 		{"4294967295.4294967295.4294967295", [2]uint32{4294967295, 4294967295}}, |  | ||||||
| 	} |  | ||||||
| 	for i, c := range tests { |  | ||||||
| 		vmaj, vmin := parseCudaVersion(c.version) |  | ||||||
| 
 |  | ||||||
| 		version := [2]uint32{vmaj, vmin} |  | ||||||
| 
 |  | ||||||
| 		require.Equal(t, c.expected, version, "%d: %v", i, c) |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func TestParseCudaVersionInvalid(t *testing.T) { |  | ||||||
| 	var tests = []string{ |  | ||||||
| 		"foo", |  | ||||||
| 		"foo.5.10", |  | ||||||
| 		"9.0.116.50", |  | ||||||
| 		"9.0.116foo", |  | ||||||
| 		"7.foo", |  | ||||||
| 		"9.0.bar", |  | ||||||
| 		"9.4294967296", |  | ||||||
| 		"9.0.116.", |  | ||||||
| 		"9..0", |  | ||||||
| 		"9.", |  | ||||||
| 		".5.10", |  | ||||||
| 		"-9", |  | ||||||
| 		"+9", |  | ||||||
| 		"-9.1.116", |  | ||||||
| 		"-9.-1.-116", |  | ||||||
| 	} |  | ||||||
| 	for _, c := range tests { |  | ||||||
| 		require.Panics(t, func() { |  | ||||||
| 			parseCudaVersion(c) |  | ||||||
| 		}, "parseCudaVersion(%v)", c) |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func TestIsPrivileged(t *testing.T) { | func TestIsPrivileged(t *testing.T) { | ||||||
| 	var tests = []struct { | 	var tests = []struct { | ||||||
| 		spec     string | 		spec     string | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user