Support CDI devices as mounts

This change allows CDI devices to be requested as mounts in the
container. This enables their use in environments such as kind
where environment variables or annotations cannot be used.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2023-10-10 13:48:38 +02:00
parent 1b1aae9c4a
commit 833254fa59
13 changed files with 278 additions and 84 deletions

View File

@@ -19,10 +19,13 @@ package image
import (
"fmt"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
)
type builder struct {
env []string
env map[string]string
mounts []specs.Mount
disableRequire bool
}
@@ -30,7 +33,12 @@ type builder struct {
func New(opt ...Option) (CUDA, error) {
b := &builder{}
for _, o := range opt {
o(b)
if err := o(b); err != nil {
return CUDA{}, err
}
}
if b.env == nil {
b.env = make(map[string]string)
}
return b.build()
@@ -38,36 +46,57 @@ func New(opt ...Option) (CUDA, error) {
// build creates a CUDA image from the builder.
func (b builder) build() (CUDA, error) {
c := make(CUDA)
for _, e := range b.env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid environment variable: %v", e)
}
c[parts[0]] = parts[1]
}
if b.disableRequire {
c[envNVDisableRequire] = "true"
b.env[envNVDisableRequire] = "true"
}
c := CUDA{
env: b.env,
mounts: b.mounts,
}
return c, nil
}
// Option is a functional option for creating a CUDA image.
type Option func(*builder)
type Option func(*builder) error
// WithDisableRequire sets the disable require option.
func WithDisableRequire(disableRequire bool) Option {
return func(b *builder) {
return func(b *builder) error {
b.disableRequire = disableRequire
return nil
}
}
// WithEnv sets the environment variables to use when creating the CUDA image.
// Note that this also overwrites the values set with WithEnvMap.
func WithEnv(env []string) Option {
return func(b *builder) {
b.env = env
return func(b *builder) error {
envmap := make(map[string]string)
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return fmt.Errorf("invalid environment variable: %v", e)
}
envmap[parts[0]] = parts[1]
}
return WithEnvMap(envmap)(b)
}
}
// WithEnvMap sets the environment variable map to use when creating the CUDA image.
// Note that this also overwrites the values set with WithEnv.
func WithEnvMap(env map[string]string) Option {
return func(b *builder) error {
b.env = env
return nil
}
}
// WithMounts sets the mounts associated with the CUDA image.
func WithMounts(mounts []specs.Mount) Option {
return func(b *builder) error {
b.mounts = mounts
return nil
}
}

View File

@@ -18,9 +18,11 @@ package image
import (
"fmt"
"path/filepath"
"strconv"
"strings"
"github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
)
@@ -37,7 +39,10 @@ const (
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type CUDA map[string]string
type CUDA struct {
env map[string]string
mounts []specs.Mount
}
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
// The process environment is read (if present) to construc the CUDA Image.
@@ -47,7 +52,10 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
env = spec.Process.Env
}
return New(WithEnv(env))
return New(
WithEnv(env),
WithMounts(spec.Mounts),
)
}
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
@@ -56,12 +64,24 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) {
return New(WithEnv(env))
}
// Getenv returns the value of the specified environment variable.
// If the environment variable is not specified, an empty string is returned.
func (i CUDA) Getenv(key string) string {
return i.env[key]
}
// HasEnvvar checks whether the specified envvar is defined in the image.
func (i CUDA) HasEnvvar(key string) bool {
_, exists := i.env[key]
return exists
}
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
// image is considered legacy if it has a CUDA_VERSION environment variable defined
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
func (i CUDA) IsLegacy() bool {
legacyCudaVersion := i[envCUDAVersion]
cudaRequire := i[envNVRequireCUDA]
legacyCudaVersion := i.env[envCUDAVersion]
cudaRequire := i.env[envNVRequireCUDA]
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}
@@ -74,7 +94,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
for name, value := range i {
for name, value := range i.env {
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
requirements = append(requirements, value)
}
@@ -93,7 +113,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
// to a valid (true) boolean value this can be used to disable the requirement checks
func (i CUDA) HasDisableRequire() bool {
if disable, exists := i[envNVDisableRequire]; exists {
if disable, exists := i.env[envNVDisableRequire]; exists {
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
d, _ := strconv.ParseBool(disable)
return d
@@ -104,12 +124,12 @@ func (i CUDA) HasDisableRequire() bool {
// DevicesFromEnvvars returns the devices requested by the image through environment variables
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
// We concantenate all the devices from the specified envvars.
// We concantenate all the devices from the specified env.
var isSet bool
var devices []string
requested := make(map[string]bool)
for _, envVar := range envVars {
if devs, ok := i[envVar]; ok {
if devs, ok := i.env[envVar]; ok {
isSet = true
for _, d := range strings.Split(devs, ",") {
trimmed := strings.TrimSpace(d)
@@ -137,7 +157,7 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
// GetDriverCapabilities returns the requested driver capabilities.
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
env := i[envNVDriverCapabilities]
env := i.env[envNVDriverCapabilities]
capabilities := make(DriverCapabilities)
for _, c := range strings.Split(env, ",") {
@@ -148,7 +168,7 @@ func (i CUDA) GetDriverCapabilities() DriverCapabilities {
}
func (i CUDA) legacyVersion() (string, error) {
cudaVersion := i[envCUDAVersion]
cudaVersion := i.env[envCUDAVersion]
majorMinor, err := parseMajorMinorVersion(cudaVersion)
if err != nil {
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
@@ -178,3 +198,79 @@ func parseMajorMinorVersion(version string) (string, error) {
}
return majorMinor, nil
}
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
var hasCDIdevice bool
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
if !parser.IsQualifiedName(device) {
return false
}
hasCDIdevice = true
}
for _, device := range i.DevicesFromMounts() {
if !strings.HasPrefix(device, "cdi/") {
return false
}
hasCDIdevice = true
}
return hasCDIdevice
}
const (
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
)
// DevicesFromMounts returns a list of device specified as mounts.
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
func (i CUDA) DevicesFromMounts() []string {
root := filepath.Clean(deviceListAsVolumeMountsRoot)
seen := make(map[string]bool)
var devices []string
for _, m := range i.mounts {
source := filepath.Clean(m.Source)
// Only consider mounts who's host volume is /dev/null
if source != "/dev/null" {
continue
}
destination := filepath.Clean(m.Destination)
if seen[destination] {
continue
}
seen[destination] = true
// Only consider container mount points that begin with 'root'
if !strings.HasPrefix(destination, root) {
continue
}
// Grab the full path beyond 'root' and add it to the list of devices
device := strings.Trim(strings.TrimPrefix(destination, root), "/")
if len(device) == 0 {
continue
}
devices = append(devices, device)
}
return devices
}
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
func (i CUDA) CDIDevicesFromMounts() []string {
var devices []string
for _, mountDevice := range i.DevicesFromMounts() {
if !strings.HasPrefix(mountDevice, "cdi/") {
continue
}
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
if len(parts) != 3 {
continue
}
vendor := parts[0]
class := parts[1]
device := parts[2]
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
}
return devices
}

View File

@@ -126,7 +126,6 @@ func TestGetRequirements(t *testing.T) {
requirements, err := image.GetRequirements()
require.NoError(t, err)
require.ElementsMatch(t, tc.requirements, requirements)
})
}