From 2a92d6acb7829caea783c030cdd36eca71be89d2 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 7 Jun 2021 14:02:10 +0200 Subject: [PATCH] Fix bug where docker swarm device selection is overriden by NVIDIA_VISIBLE_DEVICES This change fixes a bug where the value of NVIDIA_VISIBLE_DEVICES would be used to select devices even if the `swarm-resource` config option is specified. Note that this does not change the value of NVIDIA_VISIBLE_DEVICES in the container. Signed-off-by: Evan Lezar --- pkg/container_config.go | 1 + pkg/container_test.go | 190 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) diff --git a/pkg/container_config.go b/pkg/container_config.go index 25861d52..dae4cc7b 100644 --- a/pkg/container_config.go +++ b/pkg/container_config.go @@ -216,6 +216,7 @@ func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string { for _, envVar := range envVars { if devs, ok := env[envVar]; ok { devices = &devs + break } } diff --git a/pkg/container_test.go b/pkg/container_test.go index b3c258f7..3cda6280 100644 --- a/pkg/container_test.go +++ b/pkg/container_test.go @@ -632,3 +632,193 @@ func TestDeviceListSourcePriority(t *testing.T) { }) } } + +func TestGetDevicesFromEnvvar(t *testing.T) { + all := "all" + empty := "" + envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS" + gpuID := "GPU-12345" + anotherGPUID := "GPU-67890" + + var tests = []struct { + description string + envSwarmGPU *string + env map[string]string + legacyImage bool + expectedDevices *string + }{ + { + description: "empty env returns nil for non-legacy image", + }, + { + description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + envNVVisibleDevices: "", + }, + }, + { + description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + envNVVisibleDevices: "void", + }, + }, + { + description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image", + env: map[string]string{ + envNVVisibleDevices: "none", + }, + expectedDevices: &empty, + }, + { + description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image", + env: map[string]string{ + envNVVisibleDevices: gpuID, + }, + expectedDevices: &gpuID, + }, + { + description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", + env: map[string]string{ + envNVVisibleDevices: gpuID, + }, + legacyImage: true, + expectedDevices: &gpuID, + }, + { + description: "empty env returns all for legacy image", + legacyImage: true, + expectedDevices: &all, + }, + // Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when + // not enabled + { + description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + envDockerResourceGPUs: anotherGPUID, + }, + }, + { + description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + envNVVisibleDevices: "", + envDockerResourceGPUs: anotherGPUID, + }, + }, + { + description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + envNVVisibleDevices: "void", + envDockerResourceGPUs: anotherGPUID, + }, + }, + { + description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image", + env: map[string]string{ + envNVVisibleDevices: "none", + envDockerResourceGPUs: anotherGPUID, + }, + expectedDevices: &empty, + }, + { + description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image", + env: map[string]string{ + envNVVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, + }, + expectedDevices: &gpuID, + }, + { + description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", + env: map[string]string{ + envNVVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, + }, + legacyImage: true, + expectedDevices: &gpuID, + }, + { + description: "empty env returns all for legacy image", + env: map[string]string{ + envDockerResourceGPUs: anotherGPUID, + }, + legacyImage: true, + expectedDevices: &all, + }, + // Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when + // enabled + { + description: "empty env returns nil for non-legacy image", + envSwarmGPU: &envDockerResourceGPUs, + }, + { + description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image", + envSwarmGPU: &envDockerResourceGPUs, + env: map[string]string{ + envDockerResourceGPUs: "", + }, + }, + { + description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image", + envSwarmGPU: &envDockerResourceGPUs, + env: map[string]string{ + envDockerResourceGPUs: "void", + }, + }, + { + description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image", + envSwarmGPU: &envDockerResourceGPUs, + env: map[string]string{ + envDockerResourceGPUs: "none", + }, + expectedDevices: &empty, + }, + { + description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image", + envSwarmGPU: &envDockerResourceGPUs, + env: map[string]string{ + envDockerResourceGPUs: gpuID, + }, + expectedDevices: &gpuID, + }, + { + description: "DOCKER_RESOURCE_GPUS set returns value for legacy image", + envSwarmGPU: &envDockerResourceGPUs, + env: map[string]string{ + envDockerResourceGPUs: gpuID, + }, + legacyImage: true, + expectedDevices: &gpuID, + }, + { + description: "DOCKER_RESOURCE_GPUS is selected if present", + envSwarmGPU: &envDockerResourceGPUs, + env: map[string]string{ + envDockerResourceGPUs: anotherGPUID, + }, + expectedDevices: &anotherGPUID, + }, + { + description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present", + envSwarmGPU: &envDockerResourceGPUs, + env: map[string]string{ + envNVVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, + }, + expectedDevices: &anotherGPUID, + }, + } + + for i, tc := range tests { + t.Run(tc.description, func(t *testing.T) { + envSwarmGPU = tc.envSwarmGPU + devices := getDevicesFromEnvvar(tc.env, tc.legacyImage) + if tc.expectedDevices == nil { + require.Nil(t, devices, "%d: %v", i, tc) + return + } + + require.NotNil(t, devices, "%d: %v", i, tc) + require.Equal(t, *tc.expectedDevices, *devices, "%d: %v", i, tc) + }) + } +}