Add support for an NVIDIA_IMEX_CHANNELS envvar

Signed-off-by: Kevin Klues <kklues@nvidia.com>
This commit is contained in:
Kevin Klues 2024-01-17 22:38:10 +00:00
parent 0409824106
commit 296d4560b0
2 changed files with 19 additions and 0 deletions

View File

@ -23,6 +23,7 @@ const (
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES" envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES" envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES" envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES" envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
) )
@ -38,6 +39,7 @@ type nvidiaConfig struct {
Devices string Devices string
MigConfigDevices string MigConfigDevices string
MigMonitorDevices string MigMonitorDevices string
ImexChannels string
DriverCapabilities string DriverCapabilities string
// Requirements defines the requirements DSL for the container to run. // Requirements defines the requirements DSL for the container to run.
// This is empty if no specific requirements are needed, or if requirements are // This is empty if no specific requirements are needed, or if requirements are
@ -274,6 +276,14 @@ func getMigDevices(image image.CUDA, envvar string) *string {
return &devices return &devices
} }
func getImexChannels(image image.CUDA) *string {
if !image.HasEnvvar(envNVImexChannels) {
return nil
}
chans := image.Getenv(envNVImexChannels)
return &chans
}
func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities { func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities {
// We use the default driver capabilities by default. This is filtered to only include the // We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities // supported capabilities
@ -328,6 +338,11 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container") log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
} }
var imexChannels string
if c := getImexChannels(image); c != nil {
imexChannels = *c
}
driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String() driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String()
requirements, err := image.GetRequirements() requirements, err := image.GetRequirements()
@ -339,6 +354,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
Devices: devices, Devices: devices,
MigConfigDevices: migConfigDevices, MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices, MigMonitorDevices: migMonitorDevices,
ImexChannels: imexChannels,
DriverCapabilities: driverCapabilities, DriverCapabilities: driverCapabilities,
Requirements: requirements, Requirements: requirements,
} }

View File

@ -126,6 +126,9 @@ func doPrestart() {
if len(nvidia.MigMonitorDevices) > 0 { if len(nvidia.MigMonitorDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices)) args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
} }
if len(nvidia.ImexChannels) > 0 {
args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels))
}
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") { for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
if len(cap) == 0 { if len(cap) == 0 {