mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Support CDI devices as mounts
This change allows CDI devices to be requested as mounts in the container. This enables their use in environments such as kind where environment variables or annotations cannot be used. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
@@ -19,10 +19,13 @@ package image
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type builder struct {
|
||||
env []string
|
||||
env map[string]string
|
||||
mounts []specs.Mount
|
||||
disableRequire bool
|
||||
}
|
||||
|
||||
@@ -30,7 +33,12 @@ type builder struct {
|
||||
func New(opt ...Option) (CUDA, error) {
|
||||
b := &builder{}
|
||||
for _, o := range opt {
|
||||
o(b)
|
||||
if err := o(b); err != nil {
|
||||
return CUDA{}, err
|
||||
}
|
||||
}
|
||||
if b.env == nil {
|
||||
b.env = make(map[string]string)
|
||||
}
|
||||
|
||||
return b.build()
|
||||
@@ -38,36 +46,57 @@ func New(opt ...Option) (CUDA, error) {
|
||||
|
||||
// build creates a CUDA image from the builder.
|
||||
func (b builder) build() (CUDA, error) {
|
||||
c := make(CUDA)
|
||||
|
||||
for _, e := range b.env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
c[parts[0]] = parts[1]
|
||||
}
|
||||
|
||||
if b.disableRequire {
|
||||
c[envNVDisableRequire] = "true"
|
||||
b.env[envNVDisableRequire] = "true"
|
||||
}
|
||||
|
||||
c := CUDA{
|
||||
env: b.env,
|
||||
mounts: b.mounts,
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Option is a functional option for creating a CUDA image.
|
||||
type Option func(*builder)
|
||||
type Option func(*builder) error
|
||||
|
||||
// WithDisableRequire sets the disable require option.
|
||||
func WithDisableRequire(disableRequire bool) Option {
|
||||
return func(b *builder) {
|
||||
return func(b *builder) error {
|
||||
b.disableRequire = disableRequire
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithEnv sets the environment variables to use when creating the CUDA image.
|
||||
// Note that this also overwrites the values set with WithEnvMap.
|
||||
func WithEnv(env []string) Option {
|
||||
return func(b *builder) {
|
||||
b.env = env
|
||||
return func(b *builder) error {
|
||||
envmap := make(map[string]string)
|
||||
for _, e := range env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
envmap[parts[0]] = parts[1]
|
||||
}
|
||||
return WithEnvMap(envmap)(b)
|
||||
}
|
||||
}
|
||||
|
||||
// WithEnvMap sets the environment variable map to use when creating the CUDA image.
|
||||
// Note that this also overwrites the values set with WithEnv.
|
||||
func WithEnvMap(env map[string]string) Option {
|
||||
return func(b *builder) error {
|
||||
b.env = env
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithMounts sets the mounts associated with the CUDA image.
|
||||
func WithMounts(mounts []specs.Mount) Option {
|
||||
return func(b *builder) error {
|
||||
b.mounts = mounts
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,9 +18,11 @@ package image
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
@@ -37,7 +39,10 @@ const (
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
// a map of environment variable to values that can be used to perform lookups
|
||||
// such as requirements.
|
||||
type CUDA map[string]string
|
||||
type CUDA struct {
|
||||
env map[string]string
|
||||
mounts []specs.Mount
|
||||
}
|
||||
|
||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||
// The process environment is read (if present) to construc the CUDA Image.
|
||||
@@ -47,7 +52,10 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||
env = spec.Process.Env
|
||||
}
|
||||
|
||||
return New(WithEnv(env))
|
||||
return New(
|
||||
WithEnv(env),
|
||||
WithMounts(spec.Mounts),
|
||||
)
|
||||
}
|
||||
|
||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
@@ -56,12 +64,24 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
return New(WithEnv(env))
|
||||
}
|
||||
|
||||
// Getenv returns the value of the specified environment variable.
|
||||
// If the environment variable is not specified, an empty string is returned.
|
||||
func (i CUDA) Getenv(key string) string {
|
||||
return i.env[key]
|
||||
}
|
||||
|
||||
// HasEnvvar checks whether the specified envvar is defined in the image.
|
||||
func (i CUDA) HasEnvvar(key string) bool {
|
||||
_, exists := i.env[key]
|
||||
return exists
|
||||
}
|
||||
|
||||
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||
func (i CUDA) IsLegacy() bool {
|
||||
legacyCudaVersion := i[envCUDAVersion]
|
||||
cudaRequire := i[envNVRequireCUDA]
|
||||
legacyCudaVersion := i.env[envCUDAVersion]
|
||||
cudaRequire := i.env[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
@@ -74,7 +94,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range i {
|
||||
for name, value := range i.env {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
@@ -93,7 +113,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||
func (i CUDA) HasDisableRequire() bool {
|
||||
if disable, exists := i[envNVDisableRequire]; exists {
|
||||
if disable, exists := i.env[envNVDisableRequire]; exists {
|
||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||
d, _ := strconv.ParseBool(disable)
|
||||
return d
|
||||
@@ -104,12 +124,12 @@ func (i CUDA) HasDisableRequire() bool {
|
||||
|
||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
// We concantenate all the devices from the specified envvars.
|
||||
// We concantenate all the devices from the specified env.
|
||||
var isSet bool
|
||||
var devices []string
|
||||
requested := make(map[string]bool)
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := i[envVar]; ok {
|
||||
if devs, ok := i.env[envVar]; ok {
|
||||
isSet = true
|
||||
for _, d := range strings.Split(devs, ",") {
|
||||
trimmed := strings.TrimSpace(d)
|
||||
@@ -137,7 +157,7 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
|
||||
// GetDriverCapabilities returns the requested driver capabilities.
|
||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
env := i[envNVDriverCapabilities]
|
||||
env := i.env[envNVDriverCapabilities]
|
||||
|
||||
capabilities := make(DriverCapabilities)
|
||||
for _, c := range strings.Split(env, ",") {
|
||||
@@ -148,7 +168,7 @@ func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
cudaVersion := i[envCUDAVersion]
|
||||
cudaVersion := i.env[envCUDAVersion]
|
||||
majorMinor, err := parseMajorMinorVersion(cudaVersion)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
|
||||
@@ -178,3 +198,79 @@ func parseMajorMinorVersion(version string) (string, error) {
|
||||
}
|
||||
return majorMinor, nil
|
||||
}
|
||||
|
||||
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
||||
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||
var hasCDIdevice bool
|
||||
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
||||
if !parser.IsQualifiedName(device) {
|
||||
return false
|
||||
}
|
||||
hasCDIdevice = true
|
||||
}
|
||||
|
||||
for _, device := range i.DevicesFromMounts() {
|
||||
if !strings.HasPrefix(device, "cdi/") {
|
||||
return false
|
||||
}
|
||||
hasCDIdevice = true
|
||||
}
|
||||
return hasCDIdevice
|
||||
}
|
||||
|
||||
const (
|
||||
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||
)
|
||||
|
||||
// DevicesFromMounts returns a list of device specified as mounts.
|
||||
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||
func (i CUDA) DevicesFromMounts() []string {
|
||||
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
||||
seen := make(map[string]bool)
|
||||
var devices []string
|
||||
for _, m := range i.mounts {
|
||||
source := filepath.Clean(m.Source)
|
||||
// Only consider mounts who's host volume is /dev/null
|
||||
if source != "/dev/null" {
|
||||
continue
|
||||
}
|
||||
|
||||
destination := filepath.Clean(m.Destination)
|
||||
if seen[destination] {
|
||||
continue
|
||||
}
|
||||
seen[destination] = true
|
||||
|
||||
// Only consider container mount points that begin with 'root'
|
||||
if !strings.HasPrefix(destination, root) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Grab the full path beyond 'root' and add it to the list of devices
|
||||
device := strings.Trim(strings.TrimPrefix(destination, root), "/")
|
||||
if len(device) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, device)
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
|
||||
func (i CUDA) CDIDevicesFromMounts() []string {
|
||||
var devices []string
|
||||
for _, mountDevice := range i.DevicesFromMounts() {
|
||||
if !strings.HasPrefix(mountDevice, "cdi/") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
|
||||
if len(parts) != 3 {
|
||||
continue
|
||||
}
|
||||
vendor := parts[0]
|
||||
class := parts[1]
|
||||
device := parts[2]
|
||||
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
@@ -126,7 +126,6 @@ func TestGetRequirements(t *testing.T) {
|
||||
requirements, err := image.GetRequirements()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, tc.requirements, requirements)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user