From 7c0038579742735839d9638d3b28086bd2705199 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Fri, 7 Aug 2020 14:24:32 +0000 Subject: [PATCH] Refactor accepting device lists from volume mounts as a boolean Also hard code the "root" path where these volume mounts will be looked for rather than making it configurable. Signed-off-by: Kevin Klues --- config/config.toml.amzn | 2 +- config/config.toml.centos | 2 +- config/config.toml.debian | 2 +- config/config.toml.opensuse-leap | 2 +- config/config.toml.ubuntu | 2 +- pkg/container_config.go | 20 +++++++++++++------- pkg/container_test.go | 31 +++++++++++++++---------------- pkg/hook_config.go | 20 ++++++++------------ 8 files changed, 41 insertions(+), 40 deletions(-) diff --git a/config/config.toml.amzn b/config/config.toml.amzn index 4de3d1c9..24e8e7f0 100644 --- a/config/config.toml.amzn +++ b/config/config.toml.amzn @@ -1,7 +1,7 @@ disable-require = false #swarm-resource = "DOCKER_RESOURCE_GPU" #accept-nvidia-visible-devices-envvar-when-unprivileged = true -#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices" +#accept-nvidia-visible-devices-as-volume-mounts = false [nvidia-container-cli] #root = "/run/nvidia/driver" diff --git a/config/config.toml.centos b/config/config.toml.centos index 4de3d1c9..24e8e7f0 100644 --- a/config/config.toml.centos +++ b/config/config.toml.centos @@ -1,7 +1,7 @@ disable-require = false #swarm-resource = "DOCKER_RESOURCE_GPU" #accept-nvidia-visible-devices-envvar-when-unprivileged = true -#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices" +#accept-nvidia-visible-devices-as-volume-mounts = false [nvidia-container-cli] #root = "/run/nvidia/driver" diff --git a/config/config.toml.debian b/config/config.toml.debian index 4de3d1c9..24e8e7f0 100644 --- a/config/config.toml.debian +++ b/config/config.toml.debian @@ -1,7 +1,7 @@ disable-require = false #swarm-resource = "DOCKER_RESOURCE_GPU" #accept-nvidia-visible-devices-envvar-when-unprivileged = true -#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices" +#accept-nvidia-visible-devices-as-volume-mounts = false [nvidia-container-cli] #root = "/run/nvidia/driver" diff --git a/config/config.toml.opensuse-leap b/config/config.toml.opensuse-leap index fe28e163..d0380c8a 100644 --- a/config/config.toml.opensuse-leap +++ b/config/config.toml.opensuse-leap @@ -1,7 +1,7 @@ disable-require = false #swarm-resource = "DOCKER_RESOURCE_GPU" #accept-nvidia-visible-devices-envvar-when-unprivileged = true -#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices" +#accept-nvidia-visible-devices-as-volume-mounts = false [nvidia-container-cli] #root = "/run/nvidia/driver" diff --git a/config/config.toml.ubuntu b/config/config.toml.ubuntu index 061c0759..873324ae 100644 --- a/config/config.toml.ubuntu +++ b/config/config.toml.ubuntu @@ -1,7 +1,7 @@ disable-require = false #swarm-resource = "DOCKER_RESOURCE_GPU" #accept-nvidia-visible-devices-envvar-when-unprivileged = true -#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices" +#accept-nvidia-visible-devices-as-volume-mounts = false [nvidia-container-cli] #root = "/run/nvidia/driver" diff --git a/pkg/container_config.go b/pkg/container_config.go index 49ab5bb3..04171b33 100644 --- a/pkg/container_config.go +++ b/pkg/container_config.go @@ -35,6 +35,10 @@ const ( capSysAdmin = "CAP_SYS_ADMIN" ) +const ( + deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices" +) + type nvidiaConfig struct { Devices string MigConfigDevices string @@ -236,10 +240,10 @@ func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string { return devices } -func getDevicesFromMounts(root string, mounts []Mount) *string { +func getDevicesFromMounts(mounts []Mount) *string { var devices []string for _, m := range mounts { - root := filepath.Clean(root) + root := filepath.Clean(deviceListAsVolumeMountsRoot) source := filepath.Clean(m.Source) destination := filepath.Clean(m.Destination) @@ -274,14 +278,16 @@ func getDevicesFromMounts(root string, mounts []Mount) *string { } func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string { - // Try and get the device list from mount volumes first - devices := getDevicesFromMounts(*hookConfig.DeviceListVolumeMount, mounts) - if devices != nil { - return devices + // If enabled, try and get the device list from volume mounts first + if hookConfig.AcceptDeviceListAsVolumeMounts { + devices := getDevicesFromMounts(mounts) + if devices != nil { + return devices + } } // Fallback to reading from the environment variable if privileges are correct - devices = getDevicesFromEnvvar(env, legacyImage) + devices := getDevicesFromEnvvar(env, legacyImage) if devices == nil { return nil } diff --git a/pkg/container_test.go b/pkg/container_test.go index 98c40d2c..e531454b 100644 --- a/pkg/container_test.go +++ b/pkg/container_test.go @@ -454,30 +454,26 @@ func TestGetNvidiaConfig(t *testing.T) { func TestGetDevicesFromMounts(t *testing.T) { var tests = []struct { description string - root string mounts []Mount expectedDevices *string }{ { description: "No mounts", - root: defaultDeviceListVolumeMount, mounts: nil, expectedDevices: nil, }, { description: "Host path is not /dev/null", - root: defaultDeviceListVolumeMount, mounts: []Mount{ { Source: "/not/dev/null", - Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU0"), + Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"), }, }, expectedDevices: nil, }, { description: "Container path is not prefixed by 'root'", - root: defaultDeviceListVolumeMount, mounts: []Mount{ { Source: "/dev/null", @@ -488,41 +484,38 @@ func TestGetDevicesFromMounts(t *testing.T) { }, { description: "Container path is only 'root'", - root: defaultDeviceListVolumeMount, mounts: []Mount{ { Source: "/dev/null", - Destination: defaultDeviceListVolumeMount, + Destination: deviceListAsVolumeMountsRoot, }, }, expectedDevices: nil, }, { description: "Discover 2 devices", - root: defaultDeviceListVolumeMount, mounts: []Mount{ { Source: "/dev/null", - Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU0"), + Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"), }, { Source: "/dev/null", - Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU1"), + Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"), }, }, expectedDevices: &[]string{"GPU0,GPU1"}[0], }, { description: "Discover 2 devices with slashes in the name", - root: defaultDeviceListVolumeMount, mounts: []Mount{ { Source: "/dev/null", - Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU0-MIG0/0/1"), + Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0-MIG0/0/1"), }, { Source: "/dev/null", - Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU1-MIG0/0/1"), + Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1-MIG0/0/1"), }, }, expectedDevices: &[]string{"GPU0-MIG0/0/1,GPU1-MIG0/0/1"}[0], @@ -530,7 +523,7 @@ func TestGetDevicesFromMounts(t *testing.T) { } for _, tc := range tests { t.Run(tc.description, func(t *testing.T) { - devices := getDevicesFromMounts(tc.root, tc.mounts) + devices := getDevicesFromMounts(tc.mounts) if !reflect.DeepEqual(devices, tc.expectedDevices) { t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices) } @@ -545,6 +538,7 @@ func TestDeviceListSourcePriority(t *testing.T) { envvarDevices string privileged bool acceptUnprivileged bool + acceptMounts bool expectedDevices *string expectedPanic bool }{ @@ -553,16 +547,17 @@ func TestDeviceListSourcePriority(t *testing.T) { mountDevices: []Mount{ { Source: "/dev/null", - Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU0"), + Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"), }, { Source: "/dev/null", - Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU1"), + Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"), }, }, envvarDevices: "GPU2,GPU3", privileged: false, acceptUnprivileged: false, + acceptMounts: true, expectedDevices: &[]string{"GPU0,GPU1"}[0], }, { @@ -571,6 +566,7 @@ func TestDeviceListSourcePriority(t *testing.T) { envvarDevices: "GPU0,GPU1", privileged: false, acceptUnprivileged: false, + acceptMounts: true, expectedPanic: true, }, { @@ -579,6 +575,7 @@ func TestDeviceListSourcePriority(t *testing.T) { envvarDevices: "GPU0,GPU1", privileged: true, acceptUnprivileged: false, + acceptMounts: true, expectedDevices: &[]string{"GPU0,GPU1"}[0], }, { @@ -587,6 +584,7 @@ func TestDeviceListSourcePriority(t *testing.T) { envvarDevices: "GPU0,GPU1", privileged: false, acceptUnprivileged: true, + acceptMounts: true, expectedDevices: &[]string{"GPU0,GPU1"}[0], }, } @@ -600,6 +598,7 @@ func TestDeviceListSourcePriority(t *testing.T) { } hookConfig := getDefaultHookConfig() hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged + hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false) } diff --git a/pkg/hook_config.go b/pkg/hook_config.go index 09b78e14..0f809c74 100644 --- a/pkg/hook_config.go +++ b/pkg/hook_config.go @@ -13,10 +13,6 @@ const ( driverPath = "/run/nvidia/driver" ) -const ( - defaultDeviceListVolumeMount = "/var/run/nvidia-container-devices" -) - var defaultPaths = [...]string{ path.Join(driverPath, configPath), configPath, @@ -38,20 +34,20 @@ type CLIConfig struct { // HookConfig : options for the nvidia-container-toolkit. type HookConfig struct { - DisableRequire bool `toml:"disable-require"` - SwarmResource *string `toml:"swarm-resource"` - AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"` - DeviceListVolumeMount *string `toml:"look-for-nvidia-visible-devices-as-volume-mounts-under"` + DisableRequire bool `toml:"disable-require"` + SwarmResource *string `toml:"swarm-resource"` + AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"` + AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"` NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"` } func getDefaultHookConfig() (config HookConfig) { return HookConfig{ - DisableRequire: false, - SwarmResource: nil, - AcceptEnvvarUnprivileged: true, - DeviceListVolumeMount: &[]string{defaultDeviceListVolumeMount}[0], + DisableRequire: false, + SwarmResource: nil, + AcceptEnvvarUnprivileged: true, + AcceptDeviceListAsVolumeMounts: false, NvidiaContainerCLI: CLIConfig{ Root: nil, Path: nil,