Support CDI devices as mounts

This change allows CDI devices to be requested as mounts in the
container. This enables their use in environments such as kind
where environment variables or annotations cannot be used.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2023-10-10 13:48:38 +02:00
parent 1b1aae9c4a
commit 833254fa59
13 changed files with 278 additions and 84 deletions

View File

@@ -174,7 +174,7 @@ func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *stri
// if specified.
var hasSwarmEnvvar bool
for _, envvar := range swarmResourceEnvvars {
if _, exists := image[envvar]; exists {
if image.HasEnvvar(envvar) {
hasSwarmEnvvar = true
break
}
@@ -257,28 +257,31 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
return nil
}
func getMigConfigDevices(env map[string]string) *string {
if devices, ok := env[envNVMigConfigDevices]; ok {
return &devices
}
return nil
func getMigConfigDevices(image image.CUDA) *string {
return getMigDevices(image, envNVMigConfigDevices)
}
func getMigMonitorDevices(env map[string]string) *string {
if devices, ok := env[envNVMigMonitorDevices]; ok {
return &devices
}
return nil
func getMigMonitorDevices(image image.CUDA) *string {
return getMigDevices(image, envNVMigMonitorDevices)
}
func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities {
func getMigDevices(image image.CUDA, envvar string) *string {
if !image.HasEnvvar(envvar) {
return nil
}
devices := image.Getenv(envvar)
return &devices
}
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities
supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities)
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
capsEnv, capsEnvSpecified := env[envNVDriverCapabilities]
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
if !capsEnvSpecified && legacyImage {
// Environment variable unset with legacy image: set all capabilities.

View File

@@ -465,6 +465,9 @@ func TestGetNvidiaConfig(t *testing.T) {
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
image, _ := image.New(
image.WithEnvMap(tc.env),
)
// Wrap the call to getNvidiaConfig() in a closure.
var config *nvidiaConfig
getConfig := func() {
@@ -473,7 +476,7 @@ func TestGetNvidiaConfig(t *testing.T) {
defaultConfig, _ := getDefaultHookConfig()
hookConfig = &defaultConfig
}
config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged)
config = getNvidiaConfig(hookConfig, image, nil, tc.privileged)
}
// For any tests that are expected to panic, make sure they do.
@@ -678,13 +681,17 @@ func TestDeviceListSourcePriority(t *testing.T) {
// Wrap the call to getDevices() in a closure.
var devices *string
getDevices := func() {
env := map[string]string{
envNVVisibleDevices: tc.envvarDevices,
}
image, _ := image.New(
image.WithEnvMap(
map[string]string{
envNVVisibleDevices: tc.envvarDevices,
},
),
)
hookConfig, _ := getDefaultHookConfig()
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged)
}
// For all other tests, just grab the devices and check the results
@@ -905,7 +912,10 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
for i, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
image, _ := image.New(
image.WithEnvMap(tc.env),
)
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
if tc.expectedDevices == nil {
require.Nil(t, devices, "%d: %v", i, tc)
return
@@ -1021,8 +1031,11 @@ func TestGetDriverCapabilities(t *testing.T) {
SupportedDriverCapabilities: tc.supportedCapabilities,
}
image, _ := image.New(
image.WithEnvMap(tc.env),
)
getDriverCapabilities := func() {
capabilities = c.getDriverCapabilities(tc.env, tc.legacyImage).String()
capabilities = c.getDriverCapabilities(image, tc.legacyImage).String()
}
if tc.expectedPanic {