diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index abe4f11d..681880b7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -52,7 +52,7 @@ fmt: <<: *tests-setup stage: tests script: - - res=$(gofmt -l *.go) + - res=$(gofmt -l pkg/*.go) - echo "$res" - test -z "$res" diff --git a/pkg/container_config.go b/pkg/container_config.go index 172a9a46..15bbc0f4 100644 --- a/pkg/container_config.go +++ b/pkg/container_config.go @@ -192,49 +192,95 @@ func isPrivileged(s *Spec) bool { return false } -func getDevices(env map[string]string) *string { - gpuVars := []string{envNVVisibleDevices} +func isLegacyCUDAImage(env map[string]string) bool { + legacyCudaVersion := env[envCUDAVersion] + cudaRequire := env[envNVRequireCUDA] + return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 +} + +func getDevices(env map[string]string, legacyImage bool) *string { + // Build a list of envvars to consider. + envVars := []string{envNVVisibleDevices} if envSwarmGPU != nil { - // The Swarm resource has higher precedence. - gpuVars = append([]string{*envSwarmGPU}, gpuVars...) + // The Swarm envvar has higher precedence. + envVars = append([]string{*envSwarmGPU}, envVars...) } - for _, gpuVar := range gpuVars { - if devices, ok := env[gpuVar]; ok { - return &devices + // Grab a reference to devices from the first envvar + // in the list that actually exists in the environment. + var devices *string + for _, envVar := range envVars { + if devs, ok := env[envVar]; ok { + devices = &devs } } - return nil + + // Environment variable unset with legacy image: default to "all". + if devices == nil && legacyImage { + all := "all" + return &all + } + + // Environment variable unset or empty or "void": return nil + if devices == nil || len(*devices) == 0 || *devices == "void" { + return nil + } + + // Environment variable set to "none": reset to "". + if *devices == "none" { + empty := "" + return &empty + } + + // Any other value. + return devices } func getMigConfigDevices(env map[string]string) *string { - gpuVars := []string{envNVMigConfigDevices} - for _, gpuVar := range gpuVars { - if devices, ok := env[gpuVar]; ok { - return &devices - } + if devices, ok := env[envNVMigConfigDevices]; ok { + return &devices } return nil } func getMigMonitorDevices(env map[string]string) *string { - gpuVars := []string{envNVMigMonitorDevices} - for _, gpuVar := range gpuVars { - if devices, ok := env[gpuVar]; ok { - return &devices - } + if devices, ok := env[envNVMigMonitorDevices]; ok { + return &devices } return nil } -func getDriverCapabilities(env map[string]string) *string { - if capabilities, ok := env[envNVDriverCapabilities]; ok { - return &capabilities +func getDriverCapabilities(env map[string]string, legacyImage bool) *string { + // Grab a reference to the capabilities from the envvar + // if it actually exists in the environment. + var capabilities *string + if caps, ok := env[envNVDriverCapabilities]; ok { + capabilities = &caps } - return nil + + // Environment variable unset with legacy image: set all capabilities. + if capabilities == nil && legacyImage { + allCaps := allDriverCapabilities + return &allCaps + } + + // Environment variable unset or set but empty: set default capabilities. + if capabilities == nil || len(*capabilities) == 0 { + defaultCaps := defaultDriverCapabilities + return &defaultCaps + } + + // Environment variable set to "all": set all capabilities. + if *capabilities == "all" { + allCaps := allDriverCapabilities + return &allCaps + } + + // Any other value + return capabilities } -func getRequirements(env map[string]string) []string { +func getRequirements(env map[string]string, legacyImage bool) []string { // All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli var requirements []string for name, value := range env { @@ -242,98 +288,25 @@ func getRequirements(env map[string]string) []string { requirements = append(requirements, value) } } + if legacyImage { + vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion]) + cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin) + requirements = append(requirements, cudaRequire) + } return requirements } -// Mimic the new CUDA images if no capabilities or devices are specified. -func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig { - var devices string - if d := getDevices(env); d == nil { - // Environment variable unset: default to "all". - devices = "all" - } else if len(*d) == 0 || *d == "void" { - // Environment variable empty or "void": not a GPU container. - return nil - } else { - // Environment variable non-empty and not "void". - devices = *d - } - if devices == "none" { - devices = "" - } - - var migConfigDevices string - if d := getMigConfigDevices(env); d != nil { - migConfigDevices = *d - } - if !privileged && migConfigDevices != "" { - log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container") - } - - var migMonitorDevices string - if d := getMigMonitorDevices(env); d != nil { - migMonitorDevices = *d - } - if !privileged && migMonitorDevices != "" { - log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container") - } - - var driverCapabilities string - if c := getDriverCapabilities(env); c == nil { - // Environment variable unset: default to "all". - driverCapabilities = allDriverCapabilities - } else if len(*c) == 0 { - // Environment variable empty: use default capability. - driverCapabilities = defaultDriverCapabilities - } else { - // Environment variable non-empty. - driverCapabilities = *c - } - if driverCapabilities == "all" { - driverCapabilities = allDriverCapabilities - } - - requirements := getRequirements(env) - - vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion]) - cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin) - requirements = append(requirements, cudaRequire) - - // Don't fail on invalid values. - disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire]) - - return &nvidiaConfig{ - Devices: devices, - MigConfigDevices: migConfigDevices, - MigMonitorDevices: migMonitorDevices, - DriverCapabilities: driverCapabilities, - Requirements: requirements, - DisableRequire: disableRequire, - } -} - func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig { - legacyCudaVersion := env[envCUDAVersion] - cudaRequire := env[envNVRequireCUDA] - if len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 { - // Legacy CUDA image detected. - return getNvidiaConfigLegacy(env, privileged) - } + legacyImage := isLegacyCUDAImage(env) var devices string - d := getDevices(env) - if d == nil || len(*d) == 0 || *d == "void" { - // Environment variable unset or empty or "void": not a GPU container. + if d := getDevices(env, legacyImage); d != nil { + devices = *d + } else { + // 'nil' devices means this is not a GPU container. return nil } - // Environment variable non-empty and not "void". - devices = *d - - if devices == "none" { - devices = "" - } - var migConfigDevices string if d := getMigConfigDevices(env); d != nil { migConfigDevices = *d @@ -351,18 +324,11 @@ func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig { } var driverCapabilities string - if c := getDriverCapabilities(env); c == nil || len(*c) == 0 { - // Environment variable unset or set but empty: use default capability. - driverCapabilities = defaultDriverCapabilities - } else { - // Environment variable set and non-empty. + if c := getDriverCapabilities(env, legacyImage); c != nil { driverCapabilities = *c } - if driverCapabilities == "all" { - driverCapabilities = allDriverCapabilities - } - requirements := getRequirements(env) + requirements := getRequirements(env, legacyImage) // Don't fail on invalid values. disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire]) diff --git a/pkg/container_test.go b/pkg/container_test.go new file mode 100644 index 00000000..365e9c03 --- /dev/null +++ b/pkg/container_test.go @@ -0,0 +1,477 @@ +package main + +import ( + "reflect" + "testing" +) + +func TestGetNvidiaConfig(t *testing.T) { + var tests = []struct { + description string + env map[string]string + privileged bool + expectedConfig *nvidiaConfig + expectedPanic bool + }{ + { + description: "No environment, unprivileged", + env: map[string]string{}, + privileged: false, + expectedConfig: nil, + }, + { + description: "No environment, privileged", + env: map[string]string{}, + privileged: true, + expectedConfig: nil, + }, + { + description: "Legacy image, no devices, no capabilities, no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "all", + DriverCapabilities: allDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Legacy image, devices 'all', no capabilities, no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "all", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "all", + DriverCapabilities: allDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Legacy image, devices 'empty', no capabilities, no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "", + }, + privileged: false, + expectedConfig: nil, + }, + { + description: "Legacy image, devices 'void', no capabilities, no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "", + }, + privileged: false, + expectedConfig: nil, + }, + { + description: "Legacy image, devices 'none', no capabilities, no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "none", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "", + DriverCapabilities: allDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Legacy image, devices set, no capabilities, no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "gpu0,gpu1", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: allDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Legacy image, devices set, capabilities 'empty', no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: defaultDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Legacy image, devices set, capabilities 'all', no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "all", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: allDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Legacy image, devices set, capabilities set, no requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "cap0,cap1", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: "cap0,cap1", + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Legacy image, devices set, capabilities set, requirements set", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "cap0,cap1", + envNVRequirePrefix + "REQ0": "req0=true", + envNVRequirePrefix + "REQ1": "req1=false", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: "cap0,cap1", + Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"}, + DisableRequire: false, + }, + }, + { + description: "Legacy image, devices set, capabilities set, requirements set, disable requirements", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "cap0,cap1", + envNVRequirePrefix + "REQ0": "req0=true", + envNVRequirePrefix + "REQ1": "req1=false", + envNVDisableRequire: "true", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: "cap0,cap1", + Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"}, + DisableRequire: true, + }, + }, + { + description: "Modern image, no devices, no capabilities, no requirements, no envCUDAVersion", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + }, + privileged: false, + expectedConfig: nil, + }, + { + description: "Modern image, no devices, no capabilities, no requirement, envCUDAVersion set", + env: map[string]string{ + envCUDAVersion: "9.0", + envNVRequireCUDA: "cuda>=9.0", + }, + privileged: false, + expectedConfig: nil, + }, + { + description: "Modern image, devices 'all', no capabilities, no requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "all", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "all", + DriverCapabilities: defaultDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices 'empty', no capabilities, no requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "", + }, + privileged: false, + expectedConfig: nil, + }, + { + description: "Modern image, devices 'void', no capabilities, no requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "", + }, + privileged: false, + expectedConfig: nil, + }, + { + description: "Modern image, devices 'none', no capabilities, no requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "none", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "", + DriverCapabilities: defaultDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices set, no capabilities, no requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "gpu0,gpu1", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: defaultDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices set, capabilities 'empty', no requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: defaultDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices set, capabilities 'all', no requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "all", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: allDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices set, capabilities set, no requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "cap0,cap1", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: "cap0,cap1", + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices set, capabilities set, requirements set", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "cap0,cap1", + envNVRequirePrefix + "REQ0": "req0=true", + envNVRequirePrefix + "REQ1": "req1=false", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: "cap0,cap1", + Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices set, capabilities set, requirements set, disable requirements", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "gpu0,gpu1", + envNVDriverCapabilities: "cap0,cap1", + envNVRequirePrefix + "REQ0": "req0=true", + envNVRequirePrefix + "REQ1": "req1=false", + envNVDisableRequire: "true", + }, + privileged: false, + expectedConfig: &nvidiaConfig{ + Devices: "gpu0,gpu1", + DriverCapabilities: "cap0,cap1", + Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"}, + DisableRequire: true, + }, + }, + { + description: "No cuda envs, devices 'all'", + env: map[string]string{ + envNVVisibleDevices: "all", + }, + privileged: false, + + expectedConfig: &nvidiaConfig{ + Devices: "all", + DriverCapabilities: defaultDriverCapabilities, + Requirements: []string{}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices 'all', migConfig set, privileged", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "all", + envNVMigConfigDevices: "mig0,mig1", + }, + privileged: true, + expectedConfig: &nvidiaConfig{ + Devices: "all", + MigConfigDevices: "mig0,mig1", + DriverCapabilities: defaultDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices 'all', migConfig set, unprivileged", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "all", + envNVMigConfigDevices: "mig0,mig1", + }, + privileged: false, + expectedPanic: true, + }, + { + description: "Modern image, devices 'all', migMonitor set, privileged", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "all", + envNVMigMonitorDevices: "mig0,mig1", + }, + privileged: true, + expectedConfig: &nvidiaConfig{ + Devices: "all", + MigMonitorDevices: "mig0,mig1", + DriverCapabilities: defaultDriverCapabilities, + Requirements: []string{"cuda>=9.0"}, + DisableRequire: false, + }, + }, + { + description: "Modern image, devices 'all', migMonitor set, unprivileged", + env: map[string]string{ + envNVRequireCUDA: "cuda>=9.0", + envNVVisibleDevices: "all", + envNVMigMonitorDevices: "mig0,mig1", + }, + privileged: false, + expectedPanic: true, + }, + } + for _, tc := range tests { + t.Run(tc.description, func(t *testing.T) { + // Wrap the call to getNvidiaConfig() in a closure. + var config *nvidiaConfig + getConfig := func() { + config = getNvidiaConfig(tc.env, tc.privileged) + } + + // For any tests that are expected to panic, make sure they do. + if tc.expectedPanic { + mustPanic(t, getConfig) + return + } + + // For all other tests, just grab the config + getConfig() + + // And start comparing the test results to the expected results. + if config == nil && tc.expectedConfig == nil { + return + } + if config != nil && tc.expectedConfig != nil { + if !reflect.DeepEqual(config.Devices, tc.expectedConfig.Devices) { + t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig) + } + if !reflect.DeepEqual(config.MigConfigDevices, tc.expectedConfig.MigConfigDevices) { + t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig) + } + if !reflect.DeepEqual(config.MigMonitorDevices, tc.expectedConfig.MigMonitorDevices) { + t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig) + } + if !reflect.DeepEqual(config.DriverCapabilities, tc.expectedConfig.DriverCapabilities) { + t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig) + } + if !elementsMatch(config.Requirements, tc.expectedConfig.Requirements) { + t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig) + } + if !reflect.DeepEqual(config.DisableRequire, tc.expectedConfig.DisableRequire) { + t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig) + } + return + } + t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig) + }) + } +} + +func elementsMatch(slice0, slice1 []string) bool { + map0 := make(map[string]int) + map1 := make(map[string]int) + + for _, e := range slice0 { + map0[e]++ + } + + for _, e := range slice1 { + map1[e]++ + } + + for k0, v0 := range map0 { + if map1[k0] != v0 { + return false + } + } + + for k1, v1 := range map1 { + if map0[k1] != v1 { + return false + } + } + + return true +} diff --git a/pkg/hook_test.go b/pkg/hook_test.go index f22a07a3..1788cdae 100644 --- a/pkg/hook_test.go +++ b/pkg/hook_test.go @@ -1,8 +1,8 @@ package main import ( - "testing" "encoding/json" + "testing" ) func TestParseCudaVersionValid(t *testing.T) {