mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 00:08:11 +00:00
Allow IMEX channels to be requested as volume mounts
This change allows IMEX channels to be requested using the volume mount mechanism. A mount from /dev/null to /var/run/nvidia-container-devices/imex/{{ .ChannelID }} is equivalent to including {{ .ChannelID }} in the NVIDIA_IMEX_CHANNELS envvironment variables. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
92df542f2f
commit
2e6712d2bc
@ -21,7 +21,7 @@ type nvidiaConfig struct {
|
|||||||
Devices []string
|
Devices []string
|
||||||
MigConfigDevices string
|
MigConfigDevices string
|
||||||
MigMonitorDevices string
|
MigMonitorDevices string
|
||||||
ImexChannels string
|
ImexChannels []string
|
||||||
DriverCapabilities string
|
DriverCapabilities string
|
||||||
// Requirements defines the requirements DSL for the container to run.
|
// Requirements defines the requirements DSL for the container to run.
|
||||||
// This is empty if no specific requirements are needed, or if requirements are
|
// This is empty if no specific requirements are needed, or if requirements are
|
||||||
@ -197,12 +197,24 @@ func getMigDevices(image image.CUDA, envvar string) *string {
|
|||||||
return &devices
|
return &devices
|
||||||
}
|
}
|
||||||
|
|
||||||
func getImexChannels(i image.CUDA) *string {
|
func getImexChannels(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
|
||||||
if !i.HasEnvvar(image.EnvVarNvidiaImexChannels) {
|
// If enabled, try and get the device list from volume mounts first
|
||||||
|
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||||
|
devices := image.ImexChannelsFromMounts()
|
||||||
|
if len(devices) > 0 {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
}
|
||||||
|
devices := image.ImexChannelsFromEnvVar()
|
||||||
|
if len(devices) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
chans := i.Getenv(image.EnvVarNvidiaImexChannels)
|
|
||||||
return &chans
|
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
|
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
|
||||||
@ -257,10 +269,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool)
|
|||||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||||
}
|
}
|
||||||
|
|
||||||
var imexChannels string
|
imexChannels := getImexChannels(hookConfig, image, privileged)
|
||||||
if c := getImexChannels(image); c != nil {
|
|
||||||
imexChannels = *c
|
|
||||||
}
|
|
||||||
|
|
||||||
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
|
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
|
||||||
|
|
||||||
|
@ -129,8 +129,8 @@ func doPrestart() {
|
|||||||
if len(nvidia.MigMonitorDevices) > 0 {
|
if len(nvidia.MigMonitorDevices) > 0 {
|
||||||
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
|
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
|
||||||
}
|
}
|
||||||
if len(nvidia.ImexChannels) > 0 {
|
if imexString := strings.Join(nvidia.ImexChannels, ","); len(imexString) > 0 {
|
||||||
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
|
args = append(args, fmt.Sprintf("--imex-channel=%s", imexString))
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
|
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
|
||||||
|
@ -30,7 +30,8 @@ import (
|
|||||||
const (
|
const (
|
||||||
DeviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
DeviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||||
|
|
||||||
volumeMountDevicePrefixCDI = "cdi/"
|
volumeMountDevicePrefixCDI = "cdi/"
|
||||||
|
volumeMountDevicePrefixImex = "imex/"
|
||||||
)
|
)
|
||||||
|
|
||||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||||
@ -225,7 +226,10 @@ func (i CUDA) VisibleDevicesFromEnvVar() []string {
|
|||||||
func (i CUDA) VisibleDevicesFromMounts() []string {
|
func (i CUDA) VisibleDevicesFromMounts() []string {
|
||||||
var devices []string
|
var devices []string
|
||||||
for _, device := range i.DevicesFromMounts() {
|
for _, device := range i.DevicesFromMounts() {
|
||||||
if strings.HasPrefix(device, volumeMountDevicePrefixCDI) {
|
switch {
|
||||||
|
case strings.HasPrefix(device, volumeMountDevicePrefixCDI):
|
||||||
|
continue
|
||||||
|
case strings.HasPrefix(device, volumeMountDevicePrefixImex):
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
devices = append(devices, device)
|
devices = append(devices, device)
|
||||||
@ -286,6 +290,19 @@ func (i CUDA) CDIDevicesFromMounts() []string {
|
|||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i CUDA) IsEnabled(envvar string) bool {
|
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image.
|
||||||
return i.Getenv(envvar) == "enabled"
|
func (i CUDA) ImexChannelsFromEnvVar() []string {
|
||||||
|
return i.DevicesFromEnvvars(EnvVarNvidiaImexChannels).List()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image.
|
||||||
|
func (i CUDA) ImexChannelsFromMounts() []string {
|
||||||
|
var channels []string
|
||||||
|
for _, mountDevice := range i.DevicesFromMounts() {
|
||||||
|
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixImex) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
channels = append(channels, strings.TrimPrefix(mountDevice, volumeMountDevicePrefixImex))
|
||||||
|
}
|
||||||
|
return channels
|
||||||
}
|
}
|
||||||
|
@ -189,6 +189,11 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
|||||||
mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"),
|
mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"),
|
||||||
expectedDevices: []string{"GPU0", "GPU1"},
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
description: "imex devices are ignored",
|
||||||
|
mounts: makeTestMounts("GPU0", "imex/0", "GPU1"),
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
Loading…
Reference in New Issue
Block a user