mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-23 00:38:26 +00:00
877832da69
This change extends the support for multiple envvars when specifying swarm resources to consider ALL of the specified environment variables instead of the first match. Signed-off-by: Evan Lezar <elezar@nvidia.com>
191 lines
5.7 KiB
Go
191 lines
5.7 KiB
Go
/**
|
|
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
**/
|
|
|
|
package image
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
"golang.org/x/mod/semver"
|
|
)
|
|
|
|
const (
|
|
envCUDAVersion = "CUDA_VERSION"
|
|
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
|
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
|
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
|
|
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
|
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
|
)
|
|
|
|
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
|
// a map of environment variable to values that can be used to perform lookups
|
|
// such as requirements.
|
|
type CUDA map[string]string
|
|
|
|
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
|
// The process environment is read (if present) to construc the CUDA Image.
|
|
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
|
if spec == nil || spec.Process == nil {
|
|
return NewCUDAImageFromEnv(nil)
|
|
}
|
|
|
|
return NewCUDAImageFromEnv(spec.Process.Env)
|
|
}
|
|
|
|
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
|
// is a list of strings of the form ENVAR=VALUE.
|
|
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
|
c := make(CUDA)
|
|
|
|
for _, e := range env {
|
|
parts := strings.SplitN(e, "=", 2)
|
|
if len(parts) != 2 {
|
|
return nil, fmt.Errorf("invalid environment variable: %v", e)
|
|
}
|
|
c[parts[0]] = parts[1]
|
|
}
|
|
|
|
return c, nil
|
|
}
|
|
|
|
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
|
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
|
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
|
func (i CUDA) IsLegacy() bool {
|
|
legacyCudaVersion := i[envCUDAVersion]
|
|
cudaRequire := i[envNVRequireCUDA]
|
|
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
|
}
|
|
|
|
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
|
|
// variables.
|
|
func (i CUDA) GetRequirements() ([]string, error) {
|
|
// TODO: We need not process this if disable require is set, but this will be done
|
|
// in a single follow-up to ensure that the behavioural change is accurately captured.
|
|
// if i.HasDisableRequire() {
|
|
// return nil, nil
|
|
// }
|
|
|
|
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
|
var requirements []string
|
|
for name, value := range i {
|
|
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
|
requirements = append(requirements, value)
|
|
}
|
|
}
|
|
if i.IsLegacy() {
|
|
v, err := i.legacyVersion()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get version: %v", err)
|
|
}
|
|
cudaRequire := fmt.Sprintf("cuda>=%s", v)
|
|
requirements = append(requirements, cudaRequire)
|
|
}
|
|
return requirements, nil
|
|
}
|
|
|
|
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
|
// to a valid (true) boolean value this can be used to disable the requirement checks
|
|
func (i CUDA) HasDisableRequire() bool {
|
|
if disable, exists := i[envNVDisableRequire]; exists {
|
|
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
|
d, _ := strconv.ParseBool(disable)
|
|
return d
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
|
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
|
// We concantenate all the devices from the specified envvars.
|
|
var isSet bool
|
|
var devices []string
|
|
requested := make(map[string]bool)
|
|
for _, envVar := range envVars {
|
|
if devs, ok := i[envVar]; ok {
|
|
isSet = true
|
|
for _, d := range strings.Split(devs, ",") {
|
|
trimmed := strings.TrimSpace(d)
|
|
if len(trimmed) == 0 {
|
|
continue
|
|
}
|
|
devices = append(devices, trimmed)
|
|
requested[trimmed] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Environment variable unset with legacy image: default to "all".
|
|
if !isSet && len(devices) == 0 && i.IsLegacy() {
|
|
return newVisibleDevices("all")
|
|
}
|
|
|
|
// Environment variable unset or empty or "void": return nil
|
|
if len(devices) == 0 || requested["void"] {
|
|
return newVisibleDevices("void")
|
|
}
|
|
|
|
return newVisibleDevices(devices...)
|
|
}
|
|
|
|
// GetDriverCapabilities returns the requested driver capabilities.
|
|
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
|
env := i[envNVDriverCapabilities]
|
|
|
|
capabilites := make(DriverCapabilities)
|
|
for _, c := range strings.Split(env, ",") {
|
|
capabilites[DriverCapability(c)] = true
|
|
}
|
|
|
|
return capabilites
|
|
}
|
|
|
|
func (i CUDA) legacyVersion() (string, error) {
|
|
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid CUDA version: %v", err)
|
|
}
|
|
|
|
return majorMinor, nil
|
|
}
|
|
|
|
func parseMajorMinorVersion(version string) (string, error) {
|
|
vVersion := "v" + strings.TrimPrefix(version, "v")
|
|
|
|
if !semver.IsValid(vVersion) {
|
|
return "", fmt.Errorf("invalid version string")
|
|
}
|
|
|
|
majorMinor := strings.TrimPrefix(semver.MajorMinor(vVersion), "v")
|
|
parts := strings.Split(majorMinor, ".")
|
|
|
|
var err error
|
|
_, err = strconv.ParseUint(parts[0], 10, 32)
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid major version")
|
|
}
|
|
_, err = strconv.ParseUint(parts[1], 10, 32)
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid minor version")
|
|
}
|
|
return majorMinor, nil
|
|
}
|