From 2fa37973e08943cd7cdd60bd4a64e7bdfb90234b Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Wed, 17 Jan 2024 22:38:10 +0000 Subject: [PATCH 1/4] Add support for an NVIDIA_IMEX_CHANNELS envvar Signed-off-by: Kevin Klues --- .../container_config.go | 18 +++++++++++++++++- cmd/nvidia-container-runtime-hook/main.go | 3 +++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cmd/nvidia-container-runtime-hook/container_config.go b/cmd/nvidia-container-runtime-hook/container_config.go index 42732bfd..d2c8bdda 100644 --- a/cmd/nvidia-container-runtime-hook/container_config.go +++ b/cmd/nvidia-container-runtime-hook/container_config.go @@ -9,9 +9,10 @@ import ( "path/filepath" "strings" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/mod/semver" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" ) const ( @@ -22,6 +23,7 @@ const ( envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES" envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES" envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES" + envNVImexChannels = "NVIDIA_IMEX_CHANNELS" envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES" ) @@ -37,6 +39,7 @@ type nvidiaConfig struct { Devices string MigConfigDevices string MigMonitorDevices string + ImexChannels string DriverCapabilities string // Requirements defines the requirements DSL for the container to run. // This is empty if no specific requirements are needed, or if requirements are @@ -271,6 +274,13 @@ func getMigMonitorDevices(env map[string]string) *string { return nil } +func getImexChannels(env map[string]string) *string { + if chans, ok := env[envNVImexChannels]; ok { + return &chans + } + return nil +} + func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities { // We use the default driver capabilities by default. This is filtered to only include the // supported capabilities @@ -324,6 +334,11 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container") } + var imexChannels string + if c := getImexChannels(image); c != nil { + imexChannels = *c + } + driverCapabilities := hookConfig.getDriverCapabilities(image, legacyImage).String() requirements, err := image.GetRequirements() @@ -335,6 +350,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p Devices: devices, MigConfigDevices: migConfigDevices, MigMonitorDevices: migMonitorDevices, + ImexChannels: imexChannels, DriverCapabilities: driverCapabilities, Requirements: requirements, } diff --git a/cmd/nvidia-container-runtime-hook/main.go b/cmd/nvidia-container-runtime-hook/main.go index 30aad846..c004d84e 100644 --- a/cmd/nvidia-container-runtime-hook/main.go +++ b/cmd/nvidia-container-runtime-hook/main.go @@ -126,6 +126,9 @@ func doPrestart() { if len(nvidia.MigMonitorDevices) > 0 { args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices)) } + if len(nvidia.ImexChannels) > 0 { + args = append(args, fmt.Sprintf("--imex-channel=%s", nvidia.ImexChannels)) + } for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") { if len(cap) == 0 { From 888ad62c988bf002fff046b0ea3e8d994aaa91a8 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 27 Feb 2024 13:26:30 +0200 Subject: [PATCH 2/4] Update changelog Signed-off-by: Evan Lezar --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5180dde4..8a171232 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # NVIDIA Container Toolkit Changelog * Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`. +* Add support for selecting IMEX channels using the NVIDIA_IMEX_CHANNELS environement variable. ## v1.14.5 * Fix `nvidia-ctk runtime configure --cdi.enabled` for Docker. This was incorrectly setting `experimental = true` instead From 1d046b4a9b8e4e38c501b80ecb4a1ecb075c1b70 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 27 Feb 2024 13:34:23 +0200 Subject: [PATCH 3/4] Bump version to v1.14.6 Signed-off-by: Evan Lezar --- CHANGELOG.md | 1 + versions.mk | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a171232..e8851742 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # NVIDIA Container Toolkit Changelog +## v1.14.6 * Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`. * Add support for selecting IMEX channels using the NVIDIA_IMEX_CHANNELS environement variable. diff --git a/versions.mk b/versions.mk index cc9f39af..c63f8b36 100644 --- a/versions.mk +++ b/versions.mk @@ -13,7 +13,7 @@ # limitations under the License. LIB_NAME := nvidia-container-toolkit -LIB_VERSION := 1.14.5 +LIB_VERSION := 1.14.6 LIB_TAG := # The package version is the combination of the library version and tag. From 8281e7d3419d0db4d37048aa9c3601c7426bbd4e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 27 Feb 2024 14:35:41 +0200 Subject: [PATCH 4/4] Update libnvidia-container to d2eb0afe Signed-off-by: Evan Lezar --- third_party/libnvidia-container | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/libnvidia-container b/third_party/libnvidia-container index 8971b929..d2eb0afe 160000 --- a/third_party/libnvidia-container +++ b/third_party/libnvidia-container @@ -1 +1 @@ -Subproject commit 8971b92998844ea82beed7549a2dd3e3a5899310 +Subproject commit d2eb0afe86f0b643e33624ee64f065dd60e952d4