nvidia-container-toolkit/internal/config/image/cuda_image.go

/**
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/

package image

import (
	"fmt"
	"strconv"
	"strings"

	"github.com/opencontainers/runtime-spec/specs-go"
	"golang.org/x/mod/semver"
)

const (
	envCUDAVersion          = "CUDA_VERSION"
	envNVRequirePrefix      = "NVIDIA_REQUIRE_"
	envNVRequireCUDA        = envNVRequirePrefix + "CUDA"
	envNVRequireJetpack     = envNVRequirePrefix + "JETPACK"
	envNVDisableRequire     = "NVIDIA_DISABLE_REQUIRE"
	envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)

// CUDA represents a CUDA image that can be used for GPU computing. This wraps
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type CUDA map[string]string

// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
// The process environment is read (if present) to construc the CUDA Image.
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
	if spec == nil || spec.Process == nil {
		return NewCUDAImageFromEnv(nil)
	}

	return NewCUDAImageFromEnv(spec.Process.Env)
}

// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
// is a list of strings of the form ENVAR=VALUE.
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
	c := make(CUDA)

	for _, e := range env {
		parts := strings.SplitN(e, "=", 2)
		if len(parts) != 2 {
			return nil, fmt.Errorf("invalid environment variable: %v", e)
		}
		c[parts[0]] = parts[1]
	}

	return c, nil
}

// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
// image is considered legacy if it has a CUDA_VERSION environment variable defined
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
func (i CUDA) IsLegacy() bool {
	legacyCudaVersion := i[envCUDAVersion]
	cudaRequire := i[envNVRequireCUDA]
	return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}

// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
// variables.
func (i CUDA) GetRequirements() ([]string, error) {
	// TODO: We need not process this if disable require is set, but this will be done
	// in a single follow-up to ensure that the behavioural change is accurately captured.
	// if i.HasDisableRequire() {
	// 	return nil, nil
	// }

	// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
	var requirements []string
	for name, value := range i {
		if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
			requirements = append(requirements, value)
		}
	}
	if i.IsLegacy() {
		v, err := i.legacyVersion()
		if err != nil {
			return nil, fmt.Errorf("failed to get version: %v", err)
		}
		cudaRequire := fmt.Sprintf("cuda>=%s", v)
		requirements = append(requirements, cudaRequire)
	}
	return requirements, nil
}

// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
// to a valid (true) boolean value this can be used to disable the requirement checks
func (i CUDA) HasDisableRequire() bool {
	if disable, exists := i[envNVDisableRequire]; exists {
		// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
		d, _ := strconv.ParseBool(disable)
		return d
	}

	return false
}

// DevicesFromEnvvars returns the devices requested by the image through environment variables
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
	// Grab a reference to devices from the first envvar
	// in the list that actually exists in the environment.
	var devices *string
	for _, envVar := range envVars {
		if devs, ok := i[envVar]; ok {
			devices = &devs
			break
		}
	}

	// Environment variable unset with legacy image: default to "all".
	if devices == nil && i.IsLegacy() {
		return newVisibleDevices("all")
	}

	// Environment variable unset or empty or "void": return nil
	if devices == nil || len(*devices) == 0 || *devices == "void" {
		return newVisibleDevices("void")
	}

	// Environment variable set to "none": reset to "".
	return newVisibleDevices(*devices)
}

// GetDriverCapabilities returns the requested driver capabilities.
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
	env := i[envNVDriverCapabilities]

	capabilites := make(DriverCapabilities)
	for _, c := range strings.Split(env, ",") {
		capabilites[DriverCapability(c)] = true
	}

	return capabilites
}

func (i CUDA) legacyVersion() (string, error) {
	majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
	if err != nil {
		return "", fmt.Errorf("invalid CUDA version: %v", err)
	}

	return majorMinor, nil
}

func parseMajorMinorVersion(version string) (string, error) {
	vVersion := "v" + strings.TrimPrefix(version, "v")

	if !semver.IsValid(vVersion) {
		return "", fmt.Errorf("invalid version string")
	}

	majorMinor := strings.TrimPrefix(semver.MajorMinor(vVersion), "v")
	parts := strings.Split(majorMinor, ".")

	var err error
	_, err = strconv.ParseUint(parts[0], 10, 32)
	if err != nil {
		return "", fmt.Errorf("invalid major version")
	}
	_, err = strconv.ParseUint(parts[1], 10, 32)
	if err != nil {
		return "", fmt.Errorf("invalid minor version")
	}
	return majorMinor, nil
}
Add CUDA image abstraction This change adds a CUDA image abstraction that encapsulates the queries performed on a container image (e.g. envvars) to check certain CUDA properties. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-04-26 10:07:01 +00:00			`/**`
			`# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`**/`

			`package image`

			`import (`
			`"fmt"`
			`"strconv"`
			`"strings"`

			`"github.com/opencontainers/runtime-spec/specs-go"`
			`"golang.org/x/mod/semver"`
			`)`

			`const (`
Add utilities for driver capabilities to image packages Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-09-29 12:39:56 +00:00			`envCUDAVersion = "CUDA_VERSION"`
			`envNVRequirePrefix = "NVIDIA_REQUIRE_"`
			`envNVRequireCUDA = envNVRequirePrefix + "CUDA"`
			`envNVRequireJetpack = envNVRequirePrefix + "JETPACK"`
			`envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"`
			`envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"`
Add CUDA image abstraction This change adds a CUDA image abstraction that encapsulates the queries performed on a container image (e.g. envvars) to check certain CUDA properties. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-04-26 10:07:01 +00:00			`)`

			`// CUDA represents a CUDA image that can be used for GPU computing. This wraps`
			`// a map of environment variable to values that can be used to perform lookups`
			`// such as requirements.`
			`type CUDA map[string]string`

			`// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.`
			`// The process environment is read (if present) to construc the CUDA Image.`
			`func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {`
			`if spec == nil \|\| spec.Process == nil {`
			`return NewCUDAImageFromEnv(nil)`
			`}`

			`return NewCUDAImageFromEnv(spec.Process.Env)`
			`}`

			`// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment`
			`// is a list of strings of the form ENVAR=VALUE.`
			`func NewCUDAImageFromEnv(env []string) (CUDA, error) {`
			`c := make(CUDA)`

			`for _, e := range env {`
			`parts := strings.SplitN(e, "=", 2)`
			`if len(parts) != 2 {`
			`return nil, fmt.Errorf("invalid environment variable: %v", e)`
			`}`
			`c[parts[0]] = parts[1]`
			`}`

			`return c, nil`
			`}`

			`// IsLegacy returns whether the associated CUDA image is a "legacy" image. An`
			`// image is considered legacy if it has a CUDA_VERSION environment variable defined`
			`// and no NVIDIA_REQUIRE_CUDA environment variable defined.`
			`func (i CUDA) IsLegacy() bool {`
			`legacyCudaVersion := i[envCUDAVersion]`
			`cudaRequire := i[envNVRequireCUDA]`
			`return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0`
			`}`

Fix form -> from in comment Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-05-06 11:22:34 +00:00			`// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment`
Add CUDA image abstraction This change adds a CUDA image abstraction that encapsulates the queries performed on a container image (e.g. envvars) to check certain CUDA properties. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-04-26 10:07:01 +00:00			`// variables.`
			`func (i CUDA) GetRequirements() ([]string, error) {`
			`// TODO: We need not process this if disable require is set, but this will be done`
			`// in a single follow-up to ensure that the behavioural change is accurately captured.`
			`// if i.HasDisableRequire() {`
			`// return nil, nil`
			`// }`

			`// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli`
			`var requirements []string`
			`for name, value := range i {`
Ignore NVIDIA_REQUIRE_JETPACK* for image requirements Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-05-24 07:34:35 +00:00			`if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {`
Add CUDA image abstraction This change adds a CUDA image abstraction that encapsulates the queries performed on a container image (e.g. envvars) to check certain CUDA properties. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-04-26 10:07:01 +00:00			`requirements = append(requirements, value)`
			`}`
			`}`
			`if i.IsLegacy() {`
			`v, err := i.legacyVersion()`
			`if err != nil {`
			`return nil, fmt.Errorf("failed to get version: %v", err)`
			`}`
			`cudaRequire := fmt.Sprintf("cuda>=%s", v)`
			`requirements = append(requirements, cudaRequire)`
			`}`
			`return requirements, nil`
			`}`

			`// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set`
			`// to a valid (true) boolean value this can be used to disable the requirement checks`
			`func (i CUDA) HasDisableRequire() bool {`
			`if disable, exists := i[envNVDisableRequire]; exists {`
			`// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)`
			`d, _ := strconv.ParseBool(disable)`
			`return d`
			`}`

			`return false`
			`}`

Add DevicesFromEnvvars function to CUDA image Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-06-29 15:33:05 +00:00			`// DevicesFromEnvvars returns the devices requested by the image through environment variables`
Add Devices abstraction to CUDA image This change adds a Devices abstraction to the CUDA image utilities. This allows for checking whether a devices is selected, for example. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-10-26 10:37:23 +00:00			`func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {`
Add DevicesFromEnvvars function to CUDA image Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-06-29 15:33:05 +00:00			`// Grab a reference to devices from the first envvar`
			`// in the list that actually exists in the environment.`
			`var devices *string`
			`for _, envVar := range envVars {`
			`if devs, ok := i[envVar]; ok {`
			`devices = &devs`
			`break`
			`}`
			`}`

			`// Environment variable unset with legacy image: default to "all".`
			`if devices == nil && i.IsLegacy() {`
Add Devices abstraction to CUDA image This change adds a Devices abstraction to the CUDA image utilities. This allows for checking whether a devices is selected, for example. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-10-26 10:37:23 +00:00			`return newVisibleDevices("all")`
Add DevicesFromEnvvars function to CUDA image Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-06-29 15:33:05 +00:00			`}`

			`// Environment variable unset or empty or "void": return nil`
			`if devices == nil \|\| len(devices) == 0 \|\| devices == "void" {`
Add Devices abstraction to CUDA image This change adds a Devices abstraction to the CUDA image utilities. This allows for checking whether a devices is selected, for example. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-10-26 10:37:23 +00:00			`return newVisibleDevices("void")`
Add DevicesFromEnvvars function to CUDA image Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-06-29 15:33:05 +00:00			`}`

			`// Environment variable set to "none": reset to "".`
Add Devices abstraction to CUDA image This change adds a Devices abstraction to the CUDA image utilities. This allows for checking whether a devices is selected, for example. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-10-26 10:37:23 +00:00			`return newVisibleDevices(*devices)`
Add DevicesFromEnvvars function to CUDA image Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-06-29 15:33:05 +00:00			`}`

Add utilities for driver capabilities to image packages Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-09-29 12:39:56 +00:00			`// GetDriverCapabilities returns the requested driver capabilities.`
			`func (i CUDA) GetDriverCapabilities() DriverCapabilities {`
			`env := i[envNVDriverCapabilities]`

			`capabilites := make(DriverCapabilities)`
			`for _, c := range strings.Split(env, ",") {`
			`capabilites[DriverCapability(c)] = true`
			`}`

			`return capabilites`
			`}`

Add CUDA image abstraction This change adds a CUDA image abstraction that encapsulates the queries performed on a container image (e.g. envvars) to check certain CUDA properties. Signed-off-by: Evan Lezar <elezar@nvidia.com> 2022-04-26 10:07:01 +00:00			`func (i CUDA) legacyVersion() (string, error) {`
			`majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])`
			`if err != nil {`
			`return "", fmt.Errorf("invalid CUDA version: %v", err)`
			`}`

			`return majorMinor, nil`
			`}`

			`func parseMajorMinorVersion(version string) (string, error) {`
			`vVersion := "v" + strings.TrimPrefix(version, "v")`

			`if !semver.IsValid(vVersion) {`
			`return "", fmt.Errorf("invalid version string")`
			`}`

			`majorMinor := strings.TrimPrefix(semver.MajorMinor(vVersion), "v")`
			`parts := strings.Split(majorMinor, ".")`

			`var err error`
			`_, err = strconv.ParseUint(parts[0], 10, 32)`
			`if err != nil {`
			`return "", fmt.Errorf("invalid major version")`
			`}`
			`_, err = strconv.ParseUint(parts[1], 10, 32)`
			`if err != nil {`
			`return "", fmt.Errorf("invalid minor version")`
			`}`
			`return majorMinor, nil`
			`}`