mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-05 13:05:03 +00:00
Support CDI devices as mounts
This change allows CDI devices to be requested as mounts in the container. This enables their use in environments such as kind where environment variables or annotations cannot be used. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
1b1aae9c4a
commit
833254fa59
@ -174,7 +174,7 @@ func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *stri
|
|||||||
// if specified.
|
// if specified.
|
||||||
var hasSwarmEnvvar bool
|
var hasSwarmEnvvar bool
|
||||||
for _, envvar := range swarmResourceEnvvars {
|
for _, envvar := range swarmResourceEnvvars {
|
||||||
if _, exists := image[envvar]; exists {
|
if image.HasEnvvar(envvar) {
|
||||||
hasSwarmEnvvar = true
|
hasSwarmEnvvar = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -257,28 +257,31 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMigConfigDevices(env map[string]string) *string {
|
func getMigConfigDevices(image image.CUDA) *string {
|
||||||
if devices, ok := env[envNVMigConfigDevices]; ok {
|
return getMigDevices(image, envNVMigConfigDevices)
|
||||||
return &devices
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMigMonitorDevices(env map[string]string) *string {
|
func getMigMonitorDevices(image image.CUDA) *string {
|
||||||
if devices, ok := env[envNVMigMonitorDevices]; ok {
|
return getMigDevices(image, envNVMigMonitorDevices)
|
||||||
return &devices
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
|
func getMigDevices(image image.CUDA, envvar string) *string {
|
||||||
|
if !image.HasEnvvar(envvar) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
devices := image.Getenv(envvar)
|
||||||
|
return &devices
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
|
||||||
// We use the default driver capabilities by default. This is filtered to only include the
|
// We use the default driver capabilities by default. This is filtered to only include the
|
||||||
// supported capabilities
|
// supported capabilities
|
||||||
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
|
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
|
||||||
|
|
||||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||||
|
|
||||||
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
|
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
|
||||||
|
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
|
||||||
|
|
||||||
if !capsEnvSpecified && legacyImage {
|
if !capsEnvSpecified && legacyImage {
|
||||||
// Environment variable unset with legacy image: set all capabilities.
|
// Environment variable unset with legacy image: set all capabilities.
|
||||||
|
@ -465,6 +465,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(tc.env),
|
||||||
|
)
|
||||||
// Wrap the call to getNvidiaConfig() in a closure.
|
// Wrap the call to getNvidiaConfig() in a closure.
|
||||||
var config *nvidiaConfig
|
var config *nvidiaConfig
|
||||||
getConfig := func() {
|
getConfig := func() {
|
||||||
@ -473,7 +476,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
defaultConfig, _ := getDefaultHookConfig()
|
defaultConfig, _ := getDefaultHookConfig()
|
||||||
hookConfig = &defaultConfig
|
hookConfig = &defaultConfig
|
||||||
}
|
}
|
||||||
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
|
config = getNvidiaConfig(hookConfig, image, nil, tc.privileged)
|
||||||
}
|
}
|
||||||
|
|
||||||
// For any tests that are expected to panic, make sure they do.
|
// For any tests that are expected to panic, make sure they do.
|
||||||
@ -678,13 +681,17 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
|||||||
// Wrap the call to getDevices() in a closure.
|
// Wrap the call to getDevices() in a closure.
|
||||||
var devices *string
|
var devices *string
|
||||||
getDevices := func() {
|
getDevices := func() {
|
||||||
env := map[string]string{
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(
|
||||||
|
map[string]string{
|
||||||
envNVVisibleDevices: tc.envvarDevices,
|
envNVVisibleDevices: tc.envvarDevices,
|
||||||
}
|
},
|
||||||
|
),
|
||||||
|
)
|
||||||
hookConfig, _ := getDefaultHookConfig()
|
hookConfig, _ := getDefaultHookConfig()
|
||||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
|
devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged)
|
||||||
}
|
}
|
||||||
|
|
||||||
// For all other tests, just grab the devices and check the results
|
// For all other tests, just grab the devices and check the results
|
||||||
@ -905,7 +912,10 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
|
|
||||||
for i, tc := range tests {
|
for i, tc := range tests {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(tc.env),
|
||||||
|
)
|
||||||
|
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
|
||||||
if tc.expectedDevices == nil {
|
if tc.expectedDevices == nil {
|
||||||
require.Nil(t, devices, "%d: %v", i, tc)
|
require.Nil(t, devices, "%d: %v", i, tc)
|
||||||
return
|
return
|
||||||
@ -1021,8 +1031,11 @@ func TestGetDriverCapabilities(t *testing.T) {
|
|||||||
SupportedDriverCapabilities: tc.supportedCapabilities,
|
SupportedDriverCapabilities: tc.supportedCapabilities,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(tc.env),
|
||||||
|
)
|
||||||
getDriverCapabilities := func() {
|
getDriverCapabilities := func() {
|
||||||
capabilities = c.getDriverCapabilities(tc.env, tc.legacyImage).String()
|
capabilities = c.getDriverCapabilities(image, tc.legacyImage).String()
|
||||||
}
|
}
|
||||||
|
|
||||||
if tc.expectedPanic {
|
if tc.expectedPanic {
|
||||||
|
@ -19,10 +19,13 @@ package image
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type builder struct {
|
type builder struct {
|
||||||
env []string
|
env map[string]string
|
||||||
|
mounts []specs.Mount
|
||||||
disableRequire bool
|
disableRequire bool
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30,7 +33,12 @@ type builder struct {
|
|||||||
func New(opt ...Option) (CUDA, error) {
|
func New(opt ...Option) (CUDA, error) {
|
||||||
b := &builder{}
|
b := &builder{}
|
||||||
for _, o := range opt {
|
for _, o := range opt {
|
||||||
o(b)
|
if err := o(b); err != nil {
|
||||||
|
return CUDA{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if b.env == nil {
|
||||||
|
b.env = make(map[string]string)
|
||||||
}
|
}
|
||||||
|
|
||||||
return b.build()
|
return b.build()
|
||||||
@ -38,36 +46,57 @@ func New(opt ...Option) (CUDA, error) {
|
|||||||
|
|
||||||
// build creates a CUDA image from the builder.
|
// build creates a CUDA image from the builder.
|
||||||
func (b builder) build() (CUDA, error) {
|
func (b builder) build() (CUDA, error) {
|
||||||
c := make(CUDA)
|
|
||||||
|
|
||||||
for _, e := range b.env {
|
|
||||||
parts := strings.SplitN(e, "=", 2)
|
|
||||||
if len(parts) != 2 {
|
|
||||||
return nil, fmt.Errorf("invalid environment variable: %v", e)
|
|
||||||
}
|
|
||||||
c[parts[0]] = parts[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
if b.disableRequire {
|
if b.disableRequire {
|
||||||
c[envNVDisableRequire] = "true"
|
b.env[envNVDisableRequire] = "true"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c := CUDA{
|
||||||
|
env: b.env,
|
||||||
|
mounts: b.mounts,
|
||||||
|
}
|
||||||
return c, nil
|
return c, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Option is a functional option for creating a CUDA image.
|
// Option is a functional option for creating a CUDA image.
|
||||||
type Option func(*builder)
|
type Option func(*builder) error
|
||||||
|
|
||||||
// WithDisableRequire sets the disable require option.
|
// WithDisableRequire sets the disable require option.
|
||||||
func WithDisableRequire(disableRequire bool) Option {
|
func WithDisableRequire(disableRequire bool) Option {
|
||||||
return func(b *builder) {
|
return func(b *builder) error {
|
||||||
b.disableRequire = disableRequire
|
b.disableRequire = disableRequire
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithEnv sets the environment variables to use when creating the CUDA image.
|
// WithEnv sets the environment variables to use when creating the CUDA image.
|
||||||
|
// Note that this also overwrites the values set with WithEnvMap.
|
||||||
func WithEnv(env []string) Option {
|
func WithEnv(env []string) Option {
|
||||||
return func(b *builder) {
|
return func(b *builder) error {
|
||||||
b.env = env
|
envmap := make(map[string]string)
|
||||||
|
for _, e := range env {
|
||||||
|
parts := strings.SplitN(e, "=", 2)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return fmt.Errorf("invalid environment variable: %v", e)
|
||||||
|
}
|
||||||
|
envmap[parts[0]] = parts[1]
|
||||||
|
}
|
||||||
|
return WithEnvMap(envmap)(b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithEnvMap sets the environment variable map to use when creating the CUDA image.
|
||||||
|
// Note that this also overwrites the values set with WithEnv.
|
||||||
|
func WithEnvMap(env map[string]string) Option {
|
||||||
|
return func(b *builder) error {
|
||||||
|
b.env = env
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithMounts sets the mounts associated with the CUDA image.
|
||||||
|
func WithMounts(mounts []specs.Mount) Option {
|
||||||
|
return func(b *builder) error {
|
||||||
|
b.mounts = mounts
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,9 +18,11 @@ package image
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"golang.org/x/mod/semver"
|
"golang.org/x/mod/semver"
|
||||||
)
|
)
|
||||||
@ -37,7 +39,10 @@ const (
|
|||||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||||
// a map of environment variable to values that can be used to perform lookups
|
// a map of environment variable to values that can be used to perform lookups
|
||||||
// such as requirements.
|
// such as requirements.
|
||||||
type CUDA map[string]string
|
type CUDA struct {
|
||||||
|
env map[string]string
|
||||||
|
mounts []specs.Mount
|
||||||
|
}
|
||||||
|
|
||||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||||
// The process environment is read (if present) to construc the CUDA Image.
|
// The process environment is read (if present) to construc the CUDA Image.
|
||||||
@ -47,7 +52,10 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
|||||||
env = spec.Process.Env
|
env = spec.Process.Env
|
||||||
}
|
}
|
||||||
|
|
||||||
return New(WithEnv(env))
|
return New(
|
||||||
|
WithEnv(env),
|
||||||
|
WithMounts(spec.Mounts),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||||
@ -56,12 +64,24 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
|||||||
return New(WithEnv(env))
|
return New(WithEnv(env))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Getenv returns the value of the specified environment variable.
|
||||||
|
// If the environment variable is not specified, an empty string is returned.
|
||||||
|
func (i CUDA) Getenv(key string) string {
|
||||||
|
return i.env[key]
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasEnvvar checks whether the specified envvar is defined in the image.
|
||||||
|
func (i CUDA) HasEnvvar(key string) bool {
|
||||||
|
_, exists := i.env[key]
|
||||||
|
return exists
|
||||||
|
}
|
||||||
|
|
||||||
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
||||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||||
func (i CUDA) IsLegacy() bool {
|
func (i CUDA) IsLegacy() bool {
|
||||||
legacyCudaVersion := i[envCUDAVersion]
|
legacyCudaVersion := i.env[envCUDAVersion]
|
||||||
cudaRequire := i[envNVRequireCUDA]
|
cudaRequire := i.env[envNVRequireCUDA]
|
||||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,7 +94,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
|||||||
|
|
||||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||||
var requirements []string
|
var requirements []string
|
||||||
for name, value := range i {
|
for name, value := range i.env {
|
||||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
||||||
requirements = append(requirements, value)
|
requirements = append(requirements, value)
|
||||||
}
|
}
|
||||||
@ -93,7 +113,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
|||||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||||
func (i CUDA) HasDisableRequire() bool {
|
func (i CUDA) HasDisableRequire() bool {
|
||||||
if disable, exists := i[envNVDisableRequire]; exists {
|
if disable, exists := i.env[envNVDisableRequire]; exists {
|
||||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||||
d, _ := strconv.ParseBool(disable)
|
d, _ := strconv.ParseBool(disable)
|
||||||
return d
|
return d
|
||||||
@ -104,12 +124,12 @@ func (i CUDA) HasDisableRequire() bool {
|
|||||||
|
|
||||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||||
// We concantenate all the devices from the specified envvars.
|
// We concantenate all the devices from the specified env.
|
||||||
var isSet bool
|
var isSet bool
|
||||||
var devices []string
|
var devices []string
|
||||||
requested := make(map[string]bool)
|
requested := make(map[string]bool)
|
||||||
for _, envVar := range envVars {
|
for _, envVar := range envVars {
|
||||||
if devs, ok := i[envVar]; ok {
|
if devs, ok := i.env[envVar]; ok {
|
||||||
isSet = true
|
isSet = true
|
||||||
for _, d := range strings.Split(devs, ",") {
|
for _, d := range strings.Split(devs, ",") {
|
||||||
trimmed := strings.TrimSpace(d)
|
trimmed := strings.TrimSpace(d)
|
||||||
@ -137,7 +157,7 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
|||||||
|
|
||||||
// GetDriverCapabilities returns the requested driver capabilities.
|
// GetDriverCapabilities returns the requested driver capabilities.
|
||||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||||
env := i[envNVDriverCapabilities]
|
env := i.env[envNVDriverCapabilities]
|
||||||
|
|
||||||
capabilities := make(DriverCapabilities)
|
capabilities := make(DriverCapabilities)
|
||||||
for _, c := range strings.Split(env, ",") {
|
for _, c := range strings.Split(env, ",") {
|
||||||
@ -148,7 +168,7 @@ func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (i CUDA) legacyVersion() (string, error) {
|
func (i CUDA) legacyVersion() (string, error) {
|
||||||
cudaVersion := i[envCUDAVersion]
|
cudaVersion := i.env[envCUDAVersion]
|
||||||
majorMinor, err := parseMajorMinorVersion(cudaVersion)
|
majorMinor, err := parseMajorMinorVersion(cudaVersion)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
|
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
|
||||||
@ -178,3 +198,79 @@ func parseMajorMinorVersion(version string) (string, error) {
|
|||||||
}
|
}
|
||||||
return majorMinor, nil
|
return majorMinor, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
||||||
|
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||||
|
var hasCDIdevice bool
|
||||||
|
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
||||||
|
if !parser.IsQualifiedName(device) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
hasCDIdevice = true
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, device := range i.DevicesFromMounts() {
|
||||||
|
if !strings.HasPrefix(device, "cdi/") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
hasCDIdevice = true
|
||||||
|
}
|
||||||
|
return hasCDIdevice
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DevicesFromMounts returns a list of device specified as mounts.
|
||||||
|
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||||
|
func (i CUDA) DevicesFromMounts() []string {
|
||||||
|
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
var devices []string
|
||||||
|
for _, m := range i.mounts {
|
||||||
|
source := filepath.Clean(m.Source)
|
||||||
|
// Only consider mounts who's host volume is /dev/null
|
||||||
|
if source != "/dev/null" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
destination := filepath.Clean(m.Destination)
|
||||||
|
if seen[destination] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[destination] = true
|
||||||
|
|
||||||
|
// Only consider container mount points that begin with 'root'
|
||||||
|
if !strings.HasPrefix(destination, root) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Grab the full path beyond 'root' and add it to the list of devices
|
||||||
|
device := strings.Trim(strings.TrimPrefix(destination, root), "/")
|
||||||
|
if len(device) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
devices = append(devices, device)
|
||||||
|
}
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
|
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
|
||||||
|
func (i CUDA) CDIDevicesFromMounts() []string {
|
||||||
|
var devices []string
|
||||||
|
for _, mountDevice := range i.DevicesFromMounts() {
|
||||||
|
if !strings.HasPrefix(mountDevice, "cdi/") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
|
||||||
|
if len(parts) != 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vendor := parts[0]
|
||||||
|
class := parts[1]
|
||||||
|
device := parts[2]
|
||||||
|
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
|
||||||
|
}
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
@ -126,7 +126,6 @@ func TestGetRequirements(t *testing.T) {
|
|||||||
requirements, err := image.GetRequirements()
|
requirements, err := image.GetRequirements()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.ElementsMatch(t, tc.requirements, requirements)
|
require.ElementsMatch(t, tc.requirements, requirements)
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,6 @@ package info
|
|||||||
import (
|
import (
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/parser"
|
|
||||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info"
|
||||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||||
@ -69,7 +68,7 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
|
|||||||
r.logger.Infof("Auto-detected mode as '%v'", rmode)
|
r.logger.Infof("Auto-detected mode as '%v'", rmode)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if onlyFullyQualifiedCDIDevices(image) {
|
if image.OnlyFullyQualifiedCDIDevices() {
|
||||||
return "cdi"
|
return "cdi"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,14 +87,3 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
|
|||||||
|
|
||||||
return "legacy"
|
return "legacy"
|
||||||
}
|
}
|
||||||
|
|
||||||
func onlyFullyQualifiedCDIDevices(image image.CUDA) bool {
|
|
||||||
var hasCDIdevice bool
|
|
||||||
for _, device := range image.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
|
||||||
if !cdi.IsQualifiedName(device) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
hasCDIdevice = true
|
|
||||||
}
|
|
||||||
return hasCDIdevice
|
|
||||||
}
|
|
||||||
|
@ -20,6 +20,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
@ -32,7 +33,8 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
mode string
|
mode string
|
||||||
expectedMode string
|
expectedMode string
|
||||||
info map[string]bool
|
info map[string]bool
|
||||||
image image.CUDA
|
envmap map[string]string
|
||||||
|
mounts []string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "non-auto resolves to input",
|
description: "non-auto resolves to input",
|
||||||
@ -119,7 +121,7 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
description: "cdi devices resolves to cdi",
|
description: "cdi devices resolves to cdi",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
expectedMode: "cdi",
|
expectedMode: "cdi",
|
||||||
image: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=all",
|
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=all",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -127,14 +129,14 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
description: "multiple cdi devices resolves to cdi",
|
description: "multiple cdi devices resolves to cdi",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
expectedMode: "cdi",
|
expectedMode: "cdi",
|
||||||
image: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,nvidia.com/gpu=1",
|
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,nvidia.com/gpu=1",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "at least one non-cdi device resolves to legacy",
|
description: "at least one non-cdi device resolves to legacy",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
image: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
||||||
},
|
},
|
||||||
info: map[string]bool{
|
info: map[string]bool{
|
||||||
@ -147,7 +149,7 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "at least one non-cdi device resolves to csv",
|
description: "at least one non-cdi device resolves to csv",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
image: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
||||||
},
|
},
|
||||||
info: map[string]bool{
|
info: map[string]bool{
|
||||||
@ -157,6 +159,44 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expectedMode: "csv",
|
expectedMode: "csv",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
description: "cdi mount devices resolves to CDI",
|
||||||
|
mode: "auto",
|
||||||
|
mounts: []string{
|
||||||
|
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||||
|
},
|
||||||
|
expectedMode: "cdi",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "cdi mount and non-CDI devices resolves to legacy",
|
||||||
|
mode: "auto",
|
||||||
|
mounts: []string{
|
||||||
|
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||||
|
"/var/run/nvidia-container-devices/all",
|
||||||
|
},
|
||||||
|
info: map[string]bool{
|
||||||
|
"nvml": true,
|
||||||
|
"tegra": false,
|
||||||
|
"nvgpu": false,
|
||||||
|
},
|
||||||
|
expectedMode: "legacy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "cdi mount and non-CDI envvar resolves to legacy",
|
||||||
|
mode: "auto",
|
||||||
|
envmap: map[string]string{
|
||||||
|
"NVIDIA_VISIBLE_DEVICES": "0",
|
||||||
|
},
|
||||||
|
mounts: []string{
|
||||||
|
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||||
|
},
|
||||||
|
info: map[string]bool{
|
||||||
|
"nvml": true,
|
||||||
|
"tegra": false,
|
||||||
|
"nvgpu": false,
|
||||||
|
},
|
||||||
|
expectedMode: "legacy",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
@ -177,7 +217,20 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
logger: logger,
|
logger: logger,
|
||||||
info: info,
|
info: info,
|
||||||
}
|
}
|
||||||
mode := r.resolveMode(tc.mode, tc.image)
|
|
||||||
|
var mounts []specs.Mount
|
||||||
|
for _, d := range tc.mounts {
|
||||||
|
mount := specs.Mount{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: d,
|
||||||
|
}
|
||||||
|
mounts = append(mounts, mount)
|
||||||
|
}
|
||||||
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(tc.envmap),
|
||||||
|
image.WithMounts(mounts),
|
||||||
|
)
|
||||||
|
mode := r.resolveMode(tc.mode, image)
|
||||||
require.EqualValues(t, tc.expectedMode, mode)
|
require.EqualValues(t, tc.expectedMode, mode)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -67,6 +67,13 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if cfg.AcceptDeviceListAsVolumeMounts {
|
||||||
|
mountDevices := container.CDIDevicesFromMounts()
|
||||||
|
if len(mountDevices) > 0 {
|
||||||
|
return mountDevices, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar)
|
envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar)
|
||||||
|
|
||||||
var devices []string
|
var devices []string
|
||||||
|
@ -55,7 +55,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUD
|
|||||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if nvidiaRequireJetpack := image[nvidiaRequireJetpackEnvvar]; nvidiaRequireJetpack != "csv-mounts=all" {
|
if image.Getenv(nvidiaRequireJetpackEnvvar) != "csv-mounts=all" {
|
||||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,30 +32,33 @@ func TestNewCSVModifier(t *testing.T) {
|
|||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
description string
|
description string
|
||||||
cfg *config.Config
|
cfg *config.Config
|
||||||
image image.CUDA
|
envmap map[string]string
|
||||||
expectedError error
|
expectedError error
|
||||||
expectedNil bool
|
expectedNil bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "visible devices not set returns nil",
|
description: "visible devices not set returns nil",
|
||||||
image: image.CUDA{},
|
envmap: map[string]string{},
|
||||||
expectedNil: true,
|
expectedNil: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "visible devices empty returns nil",
|
description: "visible devices empty returns nil",
|
||||||
image: image.CUDA{"NVIDIA_VISIBLE_DEVICES": ""},
|
envmap: map[string]string{"NVIDIA_VISIBLE_DEVICES": ""},
|
||||||
expectedNil: true,
|
expectedNil: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "visible devices 'void' returns nil",
|
description: "visible devices 'void' returns nil",
|
||||||
image: image.CUDA{"NVIDIA_VISIBLE_DEVICES": "void"},
|
envmap: map[string]string{"NVIDIA_VISIBLE_DEVICES": "void"},
|
||||||
expectedNil: true,
|
expectedNil: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
m, err := NewCSVModifier(logger, tc.cfg, tc.image)
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(tc.envmap),
|
||||||
|
)
|
||||||
|
m, err := NewCSVModifier(logger, tc.cfg, image)
|
||||||
if tc.expectedError != nil {
|
if tc.expectedError != nil {
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
} else {
|
} else {
|
||||||
|
@ -38,7 +38,7 @@ func NewGDSModifier(logger logger.Interface, cfg *config.Config, image image.CUD
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if gds := image[nvidiaGDSEnvvar]; gds != "enabled" {
|
if image.Getenv(nvidiaGDSEnvvar) != "enabled" {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ import (
|
|||||||
func TestGraphicsModifier(t *testing.T) {
|
func TestGraphicsModifier(t *testing.T) {
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
description string
|
description string
|
||||||
cudaImage image.CUDA
|
envmap map[string]string
|
||||||
expectedRequired bool
|
expectedRequired bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
@ -34,20 +34,20 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with no capabilities does not create modifier",
|
description: "devices with no capabilities does not create modifier",
|
||||||
cudaImage: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with no non-graphics does not create modifier",
|
description: "devices with no non-graphics does not create modifier",
|
||||||
cudaImage: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "compute",
|
"NVIDIA_DRIVER_CAPABILITIES": "compute",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with all capabilities creates modifier",
|
description: "devices with all capabilities creates modifier",
|
||||||
cudaImage: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "all",
|
"NVIDIA_DRIVER_CAPABILITIES": "all",
|
||||||
},
|
},
|
||||||
@ -55,7 +55,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with graphics capability creates modifier",
|
description: "devices with graphics capability creates modifier",
|
||||||
cudaImage: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "graphics",
|
"NVIDIA_DRIVER_CAPABILITIES": "graphics",
|
||||||
},
|
},
|
||||||
@ -63,7 +63,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with compute,graphics capability creates modifier",
|
description: "devices with compute,graphics capability creates modifier",
|
||||||
cudaImage: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "compute,graphics",
|
"NVIDIA_DRIVER_CAPABILITIES": "compute,graphics",
|
||||||
},
|
},
|
||||||
@ -71,7 +71,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with display capability creates modifier",
|
description: "devices with display capability creates modifier",
|
||||||
cudaImage: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "display",
|
"NVIDIA_DRIVER_CAPABILITIES": "display",
|
||||||
},
|
},
|
||||||
@ -79,7 +79,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with display,graphics capability creates modifier",
|
description: "devices with display,graphics capability creates modifier",
|
||||||
cudaImage: image.CUDA{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "display,graphics",
|
"NVIDIA_DRIVER_CAPABILITIES": "display,graphics",
|
||||||
},
|
},
|
||||||
@ -89,7 +89,10 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
required, _ := requiresGraphicsModifier(tc.cudaImage)
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(tc.envmap),
|
||||||
|
)
|
||||||
|
required, _ := requiresGraphicsModifier(image)
|
||||||
require.EqualValues(t, tc.expectedRequired, required)
|
require.EqualValues(t, tc.expectedRequired, required)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -38,7 +38,7 @@ func NewMOFEDModifier(logger logger.Interface, cfg *config.Config, image image.C
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if mofed := image[nvidiaMOFEDEnvvar]; mofed != "enabled" {
|
if image.Getenv(nvidiaMOFEDEnvvar) != "enabled" {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user