diff --git a/cmd/nvidia-container-runtime-hook/container_config.go b/cmd/nvidia-container-runtime-hook/container_config.go index b509cf41..e982c695 100644 --- a/cmd/nvidia-container-runtime-hook/container_config.go +++ b/cmd/nvidia-container-runtime-hook/container_config.go @@ -6,7 +6,6 @@ import ( "log" "os" "path" - "path/filepath" "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/mod/semver" @@ -14,26 +13,10 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" ) -const ( - envCUDAVersion = "CUDA_VERSION" - envNVRequirePrefix = "NVIDIA_REQUIRE_" - envNVRequireCUDA = envNVRequirePrefix + "CUDA" - envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE" - envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES" - envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES" - envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES" - envNVImexChannels = "NVIDIA_IMEX_CHANNELS" - envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES" -) - const ( capSysAdmin = "CAP_SYS_ADMIN" ) -const ( - deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices" -) - type nvidiaConfig struct { Devices []string MigConfigDevices string @@ -76,23 +59,14 @@ type LinuxCapabilities struct { Ambient []string `json:"ambient,omitempty" platform:"linux"` } -// Mount from OCI runtime spec -// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103 -type Mount struct { - Destination string `json:"destination"` - Type string `json:"type,omitempty" platform:"linux,solaris"` - Source string `json:"source,omitempty"` - Options []string `json:"options,omitempty"` -} - // Spec from OCI runtime spec // We use pointers to structs, similarly to the latest version of runtime-spec: // https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28 type Spec struct { - Version *string `json:"ociVersion"` - Process *Process `json:"process,omitempty"` - Root *Root `json:"root,omitempty"` - Mounts []Mount `json:"mounts,omitempty"` + Version *string `json:"ociVersion"` + Process *Process `json:"process,omitempty"` + Root *Root `json:"root,omitempty"` + Mounts []specs.Mount `json:"mounts,omitempty"` } // HookState holds state information about the hook @@ -171,58 +145,22 @@ func isPrivileged(s *Spec) bool { return image.IsPrivileged(&fullSpec) } -func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) []string { +func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string { // We check if the image has at least one of the Swarm resource envvars defined and use this // if specified. for _, envvar := range swarmResourceEnvvars { - if image.HasEnvvar(envvar) { - return image.DevicesFromEnvvars(swarmResourceEnvvars...).List() + if containerImage.HasEnvvar(envvar) { + return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List() } } - return image.DevicesFromEnvvars(envNVVisibleDevices).List() + return containerImage.VisibleDevicesFromEnvVar() } -func getDevicesFromMounts(mounts []Mount) []string { - var devices []string - for _, m := range mounts { - root := filepath.Clean(deviceListAsVolumeMountsRoot) - source := filepath.Clean(m.Source) - destination := filepath.Clean(m.Destination) - - // Only consider mounts who's host volume is /dev/null - if source != "/dev/null" { - continue - } - // Only consider container mount points that begin with 'root' - if len(destination) < len(root) { - continue - } - if destination[:len(root)] != root { - continue - } - // Grab the full path beyond 'root' and add it to the list of devices - device := destination[len(root):] - if len(device) > 0 && device[0] == '/' { - device = device[1:] - } - if len(device) == 0 { - continue - } - devices = append(devices, device) - } - - if devices == nil { - return nil - } - - return devices -} - -func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) []string { +func getDevices(hookConfig *HookConfig, image image.CUDA, privileged bool) []string { // If enabled, try and get the device list from volume mounts first if hookConfig.AcceptDeviceListAsVolumeMounts { - devices := getDevicesFromMounts(mounts) + devices := image.VisibleDevicesFromMounts() if len(devices) > 0 { return devices } @@ -243,12 +181,12 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil return nil } -func getMigConfigDevices(image image.CUDA) *string { - return getMigDevices(image, envNVMigConfigDevices) +func getMigConfigDevices(i image.CUDA) *string { + return getMigDevices(i, image.EnvVarNvidiaMigConfigDevices) } -func getMigMonitorDevices(image image.CUDA) *string { - return getMigDevices(image, envNVMigMonitorDevices) +func getMigMonitorDevices(i image.CUDA) *string { + return getMigDevices(i, image.EnvVarNvidiaMigMonitorDevices) } func getMigDevices(image image.CUDA, envvar string) *string { @@ -259,11 +197,11 @@ func getMigDevices(image image.CUDA, envvar string) *string { return &devices } -func getImexChannels(image image.CUDA) *string { - if !image.HasEnvvar(envNVImexChannels) { +func getImexChannels(i image.CUDA) *string { + if !i.HasEnvvar(image.EnvVarNvidiaImexChannels) { return nil } - chans := image.Getenv(envNVImexChannels) + chans := i.Getenv(image.EnvVarNvidiaImexChannels) return &chans } @@ -274,8 +212,8 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities) - capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities) - capsEnv := cudaImage.Getenv(envNVDriverCapabilities) + capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities) + capsEnv := cudaImage.Getenv(image.EnvVarNvidiaDriverCapabilities) if !capsEnvSpecified && legacyImage { // Environment variable unset with legacy image: set all capabilities. @@ -294,10 +232,10 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo return capabilities } -func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig { +func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool) *nvidiaConfig { legacyImage := image.IsLegacy() - devices := getDevices(hookConfig, image, mounts, privileged) + devices := getDevices(hookConfig, image, privileged) if len(devices) == 0 { // empty devices means this is not a GPU container. return nil @@ -357,6 +295,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) { image, err := image.New( image.WithEnv(s.Process.Env), + image.WithMounts(s.Mounts), image.WithDisableRequire(hook.DisableRequire), ) if err != nil { @@ -368,6 +307,6 @@ func getContainerConfig(hook HookConfig) (config containerConfig) { Pid: h.Pid, Rootfs: s.Root.Path, Image: image, - Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged), + Nvidia: getNvidiaConfig(&hook, image, privileged), } } diff --git a/cmd/nvidia-container-runtime-hook/container_config_test.go b/cmd/nvidia-container-runtime-hook/container_config_test.go index 8f852ff8..cad9ca6e 100644 --- a/cmd/nvidia-container-runtime-hook/container_config_test.go +++ b/cmd/nvidia-container-runtime-hook/container_config_test.go @@ -4,6 +4,7 @@ import ( "path/filepath" "testing" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/require" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" @@ -33,7 +34,7 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, no devices, no capabilities, no requirements", env: map[string]string{ - envCUDAVersion: "9.0", + image.EnvVarCudaVersion: "9.0", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -45,8 +46,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices 'all', no capabilities, no requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "all", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "all", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -58,8 +59,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices 'empty', no capabilities, no requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "", }, privileged: false, expectedConfig: nil, @@ -67,8 +68,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices 'void', no capabilities, no requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "void", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "void", }, privileged: false, expectedConfig: nil, @@ -76,8 +77,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices 'none', no capabilities, no requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "none", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "none", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -89,8 +90,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices set, no capabilities, no requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "gpu0,gpu1", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -102,9 +103,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices set, capabilities 'empty', no requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -116,9 +117,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices set, capabilities 'all', no requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "all", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "all", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -130,9 +131,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices set, capabilities set, no requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "video,display", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "video,display", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -144,11 +145,11 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices set, capabilities set, requirements set", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "video,display", - envNVRequirePrefix + "REQ0": "req0=true", - envNVRequirePrefix + "REQ1": "req1=false", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "video,display", + image.NvidiaRequirePrefix + "REQ0": "req0=true", + image.NvidiaRequirePrefix + "REQ1": "req1=false", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -160,12 +161,12 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Legacy image, devices set, capabilities set, requirements set, disable requirements", env: map[string]string{ - envCUDAVersion: "9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "video,display", - envNVRequirePrefix + "REQ0": "req0=true", - envNVRequirePrefix + "REQ1": "req1=false", - envNVDisableRequire: "true", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "video,display", + image.NvidiaRequirePrefix + "REQ0": "req0=true", + image.NvidiaRequirePrefix + "REQ1": "req1=false", + image.EnvVarNvidiaDisableRequire: "true", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -175,18 +176,18 @@ func TestGetNvidiaConfig(t *testing.T) { }, }, { - description: "Modern image, no devices, no capabilities, no requirements, no envCUDAVersion", + description: "Modern image, no devices, no capabilities, no requirements, no image.EnvVarCudaVersion", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", }, privileged: false, expectedConfig: nil, }, { - description: "Modern image, no devices, no capabilities, no requirement, envCUDAVersion set", + description: "Modern image, no devices, no capabilities, no requirement, image.EnvVarCudaVersion set", env: map[string]string{ - envCUDAVersion: "9.0", - envNVRequireCUDA: "cuda>=9.0", + image.EnvVarCudaVersion: "9.0", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", }, privileged: false, expectedConfig: nil, @@ -194,8 +195,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices 'all', no capabilities, no requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "all", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "all", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -207,8 +208,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices 'empty', no capabilities, no requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "", }, privileged: false, expectedConfig: nil, @@ -216,8 +217,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices 'void', no capabilities, no requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "void", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "void", }, privileged: false, expectedConfig: nil, @@ -225,8 +226,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices 'none', no capabilities, no requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "none", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "none", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -238,8 +239,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices set, no capabilities, no requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -251,9 +252,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices set, capabilities 'empty', no requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -265,9 +266,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices set, capabilities 'all', no requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "all", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "all", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -279,9 +280,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices set, capabilities set, no requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "video,display", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "video,display", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -293,11 +294,11 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices set, capabilities set, requirements set", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "video,display", - envNVRequirePrefix + "REQ0": "req0=true", - envNVRequirePrefix + "REQ1": "req1=false", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "video,display", + image.NvidiaRequirePrefix + "REQ0": "req0=true", + image.NvidiaRequirePrefix + "REQ1": "req1=false", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -309,12 +310,12 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices set, capabilities set, requirements set, disable requirements", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "gpu0,gpu1", - envNVDriverCapabilities: "video,display", - envNVRequirePrefix + "REQ0": "req0=true", - envNVRequirePrefix + "REQ1": "req1=false", - envNVDisableRequire: "true", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1", + image.EnvVarNvidiaDriverCapabilities: "video,display", + image.NvidiaRequirePrefix + "REQ0": "req0=true", + image.NvidiaRequirePrefix + "REQ1": "req1=false", + image.EnvVarNvidiaDisableRequire: "true", }, privileged: false, expectedConfig: &nvidiaConfig{ @@ -326,7 +327,7 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "No cuda envs, devices 'all'", env: map[string]string{ - envNVVisibleDevices: "all", + image.EnvVarNvidiaVisibleDevices: "all", }, privileged: false, @@ -339,9 +340,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices 'all', migConfig set, privileged", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "all", - envNVMigConfigDevices: "mig0,mig1", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "all", + image.EnvVarNvidiaMigConfigDevices: "mig0,mig1", }, privileged: true, expectedConfig: &nvidiaConfig{ @@ -354,9 +355,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices 'all', migConfig set, unprivileged", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "all", - envNVMigConfigDevices: "mig0,mig1", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "all", + image.EnvVarNvidiaMigConfigDevices: "mig0,mig1", }, privileged: false, expectedPanic: true, @@ -364,9 +365,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices 'all', migMonitor set, privileged", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "all", - envNVMigMonitorDevices: "mig0,mig1", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "all", + image.EnvVarNvidiaMigMonitorDevices: "mig0,mig1", }, privileged: true, expectedConfig: &nvidiaConfig{ @@ -379,9 +380,9 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Modern image, devices 'all', migMonitor set, unprivileged", env: map[string]string{ - envNVRequireCUDA: "cuda>=9.0", - envNVVisibleDevices: "all", - envNVMigMonitorDevices: "mig0,mig1", + image.EnvVarNvidiaRequireCuda: "cuda>=9.0", + image.EnvVarNvidiaVisibleDevices: "all", + image.EnvVarNvidiaMigMonitorDevices: "mig0,mig1", }, privileged: false, expectedPanic: true, @@ -389,8 +390,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Hook config set as driver-capabilities-all", env: map[string]string{ - envNVVisibleDevices: "all", - envNVDriverCapabilities: "all", + image.EnvVarNvidiaVisibleDevices: "all", + image.EnvVarNvidiaDriverCapabilities: "all", }, privileged: true, hookConfig: &HookConfig{ @@ -404,8 +405,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Hook config set, envvar sets driver-capabilities", env: map[string]string{ - envNVVisibleDevices: "all", - envNVDriverCapabilities: "video,display", + image.EnvVarNvidiaVisibleDevices: "all", + image.EnvVarNvidiaDriverCapabilities: "video,display", }, privileged: true, hookConfig: &HookConfig{ @@ -419,7 +420,7 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Hook config set, envvar unset sets default driver-capabilities", env: map[string]string{ - envNVVisibleDevices: "all", + image.EnvVarNvidiaVisibleDevices: "all", }, privileged: true, hookConfig: &HookConfig{ @@ -433,8 +434,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Hook config set, swarmResource overrides device selection", env: map[string]string{ - envNVVisibleDevices: "all", - "DOCKER_SWARM_RESOURCE": "GPU1,GPU2", + image.EnvVarNvidiaVisibleDevices: "all", + "DOCKER_SWARM_RESOURCE": "GPU1,GPU2", }, privileged: true, hookConfig: &HookConfig{ @@ -449,8 +450,8 @@ func TestGetNvidiaConfig(t *testing.T) { { description: "Hook config set, comma separated swarmResource is split and overrides device selection", env: map[string]string{ - envNVVisibleDevices: "all", - "DOCKER_SWARM_RESOURCE": "GPU1,GPU2", + image.EnvVarNvidiaVisibleDevices: "all", + "DOCKER_SWARM_RESOURCE": "GPU1,GPU2", }, privileged: true, hookConfig: &HookConfig{ @@ -476,7 +477,7 @@ func TestGetNvidiaConfig(t *testing.T) { defaultConfig, _ := getDefaultHookConfig() hookConfig = &defaultConfig } - config = getNvidiaConfig(hookConfig, image, nil, tc.privileged) + config = getNvidiaConfig(hookConfig, image, tc.privileged) } // For any tests that are expected to panic, make sure they do. @@ -506,88 +507,10 @@ func TestGetNvidiaConfig(t *testing.T) { } } -func TestGetDevicesFromMounts(t *testing.T) { - var tests = []struct { - description string - mounts []Mount - expectedDevices []string - }{ - { - description: "No mounts", - mounts: nil, - expectedDevices: nil, - }, - { - description: "Host path is not /dev/null", - mounts: []Mount{ - { - Source: "/not/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"), - }, - }, - expectedDevices: nil, - }, - { - description: "Container path is not prefixed by 'root'", - mounts: []Mount{ - { - Source: "/dev/null", - Destination: filepath.Join("/other/prefix", "GPU0"), - }, - }, - expectedDevices: nil, - }, - { - description: "Container path is only 'root'", - mounts: []Mount{ - { - Source: "/dev/null", - Destination: deviceListAsVolumeMountsRoot, - }, - }, - expectedDevices: nil, - }, - { - description: "Discover 2 devices", - mounts: []Mount{ - { - Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"), - }, - { - Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"), - }, - }, - expectedDevices: []string{"GPU0", "GPU1"}, - }, - { - description: "Discover 2 devices with slashes in the name", - mounts: []Mount{ - { - Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0-MIG0/0/1"), - }, - { - Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1-MIG0/0/1"), - }, - }, - expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"}, - }, - } - for _, tc := range tests { - t.Run(tc.description, func(t *testing.T) { - devices := getDevicesFromMounts(tc.mounts) - require.Equal(t, tc.expectedDevices, devices) - }) - } -} - func TestDeviceListSourcePriority(t *testing.T) { var tests = []struct { description string - mountDevices []Mount + mountDevices []specs.Mount envvarDevices string privileged bool acceptUnprivileged bool @@ -596,14 +519,14 @@ func TestDeviceListSourcePriority(t *testing.T) { }{ { description: "Mount devices, unprivileged, no accept unprivileged", - mountDevices: []Mount{ + mountDevices: []specs.Mount{ { Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"), + Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"), }, { Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"), + Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"), }, }, envvarDevices: "GPU2,GPU3", @@ -641,14 +564,14 @@ func TestDeviceListSourcePriority(t *testing.T) { }, { description: "Mount devices, unprivileged, accept unprivileged, no accept mounts", - mountDevices: []Mount{ + mountDevices: []specs.Mount{ { Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"), + Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"), }, { Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"), + Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"), }, }, envvarDevices: "GPU2,GPU3", @@ -659,14 +582,14 @@ func TestDeviceListSourcePriority(t *testing.T) { }, { description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts", - mountDevices: []Mount{ + mountDevices: []specs.Mount{ { Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"), + Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"), }, { Source: "/dev/null", - Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"), + Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"), }, }, envvarDevices: "GPU2,GPU3", @@ -684,14 +607,15 @@ func TestDeviceListSourcePriority(t *testing.T) { image, _ := image.New( image.WithEnvMap( map[string]string{ - envNVVisibleDevices: tc.envvarDevices, + image.EnvVarNvidiaVisibleDevices: tc.envvarDevices, }, ), + image.WithMounts(tc.mountDevices), ) hookConfig, _ := getDefaultHookConfig() hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts - devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged) + devices = getDevices(&hookConfig, image, tc.privileged) } // For all other tests, just grab the devices and check the results @@ -720,41 +644,41 @@ func TestGetDevicesFromEnvvar(t *testing.T) { { description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", env: map[string]string{ - envNVVisibleDevices: "", + image.EnvVarNvidiaVisibleDevices: "", }, }, { description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", env: map[string]string{ - envNVVisibleDevices: "void", + image.EnvVarNvidiaVisibleDevices: "void", }, }, { description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image", env: map[string]string{ - envNVVisibleDevices: "none", + image.EnvVarNvidiaVisibleDevices: "none", }, expectedDevices: []string{""}, }, { description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image", env: map[string]string{ - envNVVisibleDevices: gpuID, + image.EnvVarNvidiaVisibleDevices: gpuID, }, expectedDevices: []string{gpuID}, }, { description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", env: map[string]string{ - envNVVisibleDevices: gpuID, - envCUDAVersion: "legacy", + image.EnvVarNvidiaVisibleDevices: gpuID, + image.EnvVarCudaVersion: "legacy", }, expectedDevices: []string{gpuID}, }, { description: "empty env returns all for legacy image", env: map[string]string{ - envCUDAVersion: "legacy", + image.EnvVarCudaVersion: "legacy", }, expectedDevices: []string{"all"}, }, @@ -769,47 +693,47 @@ func TestGetDevicesFromEnvvar(t *testing.T) { { description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", env: map[string]string{ - envNVVisibleDevices: "", - envDockerResourceGPUs: anotherGPUID, + image.EnvVarNvidiaVisibleDevices: "", + envDockerResourceGPUs: anotherGPUID, }, }, { description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", env: map[string]string{ - envNVVisibleDevices: "void", - envDockerResourceGPUs: anotherGPUID, + image.EnvVarNvidiaVisibleDevices: "void", + envDockerResourceGPUs: anotherGPUID, }, }, { description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image", env: map[string]string{ - envNVVisibleDevices: "none", - envDockerResourceGPUs: anotherGPUID, + image.EnvVarNvidiaVisibleDevices: "none", + envDockerResourceGPUs: anotherGPUID, }, expectedDevices: []string{""}, }, { description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image", env: map[string]string{ - envNVVisibleDevices: gpuID, - envDockerResourceGPUs: anotherGPUID, + image.EnvVarNvidiaVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, }, expectedDevices: []string{gpuID}, }, { description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", env: map[string]string{ - envNVVisibleDevices: gpuID, - envDockerResourceGPUs: anotherGPUID, - envCUDAVersion: "legacy", + image.EnvVarNvidiaVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, + image.EnvVarCudaVersion: "legacy", }, expectedDevices: []string{gpuID}, }, { description: "empty env returns all for legacy image", env: map[string]string{ - envDockerResourceGPUs: anotherGPUID, - envCUDAVersion: "legacy", + envDockerResourceGPUs: anotherGPUID, + image.EnvVarCudaVersion: "legacy", }, expectedDevices: []string{"all"}, }, @@ -853,8 +777,8 @@ func TestGetDevicesFromEnvvar(t *testing.T) { description: "DOCKER_RESOURCE_GPUS set returns value for legacy image", swarmResourceEnvvars: []string{envDockerResourceGPUs}, env: map[string]string{ - envDockerResourceGPUs: gpuID, - envCUDAVersion: "legacy", + envDockerResourceGPUs: gpuID, + image.EnvVarCudaVersion: "legacy", }, expectedDevices: []string{gpuID}, }, @@ -870,8 +794,8 @@ func TestGetDevicesFromEnvvar(t *testing.T) { description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present", swarmResourceEnvvars: []string{envDockerResourceGPUs}, env: map[string]string{ - envNVVisibleDevices: gpuID, - envDockerResourceGPUs: anotherGPUID, + image.EnvVarNvidiaVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, }, expectedDevices: []string{anotherGPUID}, }, @@ -879,7 +803,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) { description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present", swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"}, env: map[string]string{ - envNVVisibleDevices: gpuID, + image.EnvVarNvidiaVisibleDevices: gpuID, "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, }, expectedDevices: []string{anotherGPUID}, @@ -888,7 +812,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) { description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present", swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"}, env: map[string]string{ - envNVVisibleDevices: gpuID, + image.EnvVarNvidiaVisibleDevices: gpuID, "DOCKER_RESOURCE_GPUS": thirdGPUID, "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, }, @@ -898,7 +822,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) { description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present", swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"}, env: map[string]string{ - envNVVisibleDevices: gpuID, + image.EnvVarNvidiaVisibleDevices: gpuID, "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, }, expectedDevices: []string{anotherGPUID}, @@ -931,7 +855,7 @@ func TestGetDriverCapabilities(t *testing.T) { { description: "Env is set for legacy image", env: map[string]string{ - envNVDriverCapabilities: "display,video", + image.EnvVarNvidiaDriverCapabilities: "display,video", }, legacyImage: true, supportedCapabilities: supportedCapabilities, @@ -940,7 +864,7 @@ func TestGetDriverCapabilities(t *testing.T) { { description: "Env is all for legacy image", env: map[string]string{ - envNVDriverCapabilities: "all", + image.EnvVarNvidiaDriverCapabilities: "all", }, legacyImage: true, supportedCapabilities: supportedCapabilities, @@ -949,7 +873,7 @@ func TestGetDriverCapabilities(t *testing.T) { { description: "Env is empty for legacy image", env: map[string]string{ - envNVDriverCapabilities: "", + image.EnvVarNvidiaDriverCapabilities: "", }, legacyImage: true, supportedCapabilities: supportedCapabilities, @@ -965,7 +889,7 @@ func TestGetDriverCapabilities(t *testing.T) { { description: "Env is set for modern image", env: map[string]string{ - envNVDriverCapabilities: "display,video", + image.EnvVarNvidiaDriverCapabilities: "display,video", }, legacyImage: false, supportedCapabilities: supportedCapabilities, @@ -981,7 +905,7 @@ func TestGetDriverCapabilities(t *testing.T) { { description: "Env is all for modern image", env: map[string]string{ - envNVDriverCapabilities: "all", + image.EnvVarNvidiaDriverCapabilities: "all", }, legacyImage: false, supportedCapabilities: supportedCapabilities, @@ -990,7 +914,7 @@ func TestGetDriverCapabilities(t *testing.T) { { description: "Env is empty for modern image", env: map[string]string{ - envNVDriverCapabilities: "", + image.EnvVarNvidiaDriverCapabilities: "", }, legacyImage: false, supportedCapabilities: supportedCapabilities, @@ -999,7 +923,7 @@ func TestGetDriverCapabilities(t *testing.T) { { description: "Invalid capabilities panic", env: map[string]string{ - envNVDriverCapabilities: "compute,utility", + image.EnvVarNvidiaDriverCapabilities: "compute,utility", }, supportedCapabilities: "not-compute,not-utility", expectedPanic: true, diff --git a/internal/config/image/builder.go b/internal/config/image/builder.go index da9025f0..332d017a 100644 --- a/internal/config/image/builder.go +++ b/internal/config/image/builder.go @@ -47,7 +47,7 @@ func New(opt ...Option) (CUDA, error) { // build creates a CUDA image from the builder. func (b builder) build() (CUDA, error) { if b.disableRequire { - b.env[envNVDisableRequire] = "true" + b.env[EnvVarNvidiaDisableRequire] = "true" } c := CUDA{ diff --git a/internal/config/image/cuda_image.go b/internal/config/image/cuda_image.go index 75e622b7..f6c0979b 100644 --- a/internal/config/image/cuda_image.go +++ b/internal/config/image/cuda_image.go @@ -28,12 +28,9 @@ import ( ) const ( - envCUDAVersion = "CUDA_VERSION" - envNVRequirePrefix = "NVIDIA_REQUIRE_" - envNVRequireCUDA = envNVRequirePrefix + "CUDA" - envNVRequireJetpack = envNVRequirePrefix + "JETPACK" - envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE" - envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES" + DeviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices" + + volumeMountDevicePrefixCDI = "cdi/" ) // CUDA represents a CUDA image that can be used for GPU computing. This wraps @@ -80,8 +77,8 @@ func (i CUDA) HasEnvvar(key string) bool { // image is considered legacy if it has a CUDA_VERSION environment variable defined // and no NVIDIA_REQUIRE_CUDA environment variable defined. func (i CUDA) IsLegacy() bool { - legacyCudaVersion := i.env[envCUDAVersion] - cudaRequire := i.env[envNVRequireCUDA] + legacyCudaVersion := i.env[EnvVarCudaVersion] + cudaRequire := i.env[EnvVarNvidiaRequireCuda] return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 } @@ -95,7 +92,7 @@ func (i CUDA) GetRequirements() ([]string, error) { // All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli var requirements []string for name, value := range i.env { - if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) { + if strings.HasPrefix(name, NvidiaRequirePrefix) && !strings.HasPrefix(name, EnvVarNvidiaRequireJetpack) { requirements = append(requirements, value) } } @@ -113,7 +110,7 @@ func (i CUDA) GetRequirements() ([]string, error) { // HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set // to a valid (true) boolean value this can be used to disable the requirement checks func (i CUDA) HasDisableRequire() bool { - if disable, exists := i.env[envNVDisableRequire]; exists { + if disable, exists := i.env[EnvVarNvidiaDisableRequire]; exists { // i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable) d, _ := strconv.ParseBool(disable) return d @@ -157,7 +154,7 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices { // GetDriverCapabilities returns the requested driver capabilities. func (i CUDA) GetDriverCapabilities() DriverCapabilities { - env := i.env[envNVDriverCapabilities] + env := i.env[EnvVarNvidiaDriverCapabilities] capabilities := make(DriverCapabilities) for _, c := range strings.Split(env, ",") { @@ -168,7 +165,7 @@ func (i CUDA) GetDriverCapabilities() DriverCapabilities { } func (i CUDA) legacyVersion() (string, error) { - cudaVersion := i.env[envCUDAVersion] + cudaVersion := i.env[EnvVarCudaVersion] majorMinor, err := parseMajorMinorVersion(cudaVersion) if err != nil { return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err) @@ -202,7 +199,7 @@ func parseMajorMinorVersion(version string) (string, error) { // OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool { var hasCDIdevice bool - for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() { + for _, device := range i.VisibleDevicesFromEnvVar() { if !parser.IsQualifiedName(device) { return false } @@ -218,14 +215,28 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool { return hasCDIdevice } -const ( - deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices" -) +// VisibleDevicesFromEnvVar returns the set of visible devices requested through +// the NVIDIA_VISIBLE_DEVICES environment variable. +func (i CUDA) VisibleDevicesFromEnvVar() []string { + return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List() +} + +// VisibleDevicesFromMounts returns the set of visible devices requested as mounts. +func (i CUDA) VisibleDevicesFromMounts() []string { + var devices []string + for _, device := range i.DevicesFromMounts() { + if strings.HasPrefix(device, volumeMountDevicePrefixCDI) { + continue + } + devices = append(devices, device) + } + return devices +} // DevicesFromMounts returns a list of device specified as mounts. // TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime func (i CUDA) DevicesFromMounts() []string { - root := filepath.Clean(deviceListAsVolumeMountsRoot) + root := filepath.Clean(DeviceListAsVolumeMountsRoot) seen := make(map[string]bool) var devices []string for _, m := range i.mounts { @@ -260,10 +271,10 @@ func (i CUDA) DevicesFromMounts() []string { func (i CUDA) CDIDevicesFromMounts() []string { var devices []string for _, mountDevice := range i.DevicesFromMounts() { - if !strings.HasPrefix(mountDevice, "cdi/") { + if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixCDI) { continue } - parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3) + parts := strings.SplitN(strings.TrimPrefix(mountDevice, volumeMountDevicePrefixCDI), "/", 3) if len(parts) != 3 { continue } diff --git a/internal/config/image/cuda_image_test.go b/internal/config/image/cuda_image_test.go index 406ad443..9d9d5044 100644 --- a/internal/config/image/cuda_image_test.go +++ b/internal/config/image/cuda_image_test.go @@ -17,8 +17,10 @@ package image import ( + "path/filepath" "testing" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/require" ) @@ -130,3 +132,80 @@ func TestGetRequirements(t *testing.T) { } } + +func TestGetVisibleDevicesFromMounts(t *testing.T) { + var tests = []struct { + description string + mounts []specs.Mount + expectedDevices []string + }{ + { + description: "No mounts", + mounts: nil, + expectedDevices: nil, + }, + { + description: "Host path is not /dev/null", + mounts: []specs.Mount{ + { + Source: "/not/dev/null", + Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"), + }, + }, + expectedDevices: nil, + }, + { + description: "Container path is not prefixed by 'root'", + mounts: []specs.Mount{ + { + Source: "/dev/null", + Destination: filepath.Join("/other/prefix", "GPU0"), + }, + }, + expectedDevices: nil, + }, + { + description: "Container path is only 'root'", + mounts: []specs.Mount{ + { + Source: "/dev/null", + Destination: DeviceListAsVolumeMountsRoot, + }, + }, + expectedDevices: nil, + }, + { + description: "Discover 2 devices", + mounts: makeTestMounts("GPU0", "GPU1"), + expectedDevices: []string{"GPU0", "GPU1"}, + }, + { + description: "Discover 2 devices with slashes in the name", + mounts: makeTestMounts("GPU0-MIG0/0/1", "GPU1-MIG0/0/1"), + expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"}, + }, + { + description: "cdi devices are ignored", + mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"), + expectedDevices: []string{"GPU0", "GPU1"}, + }, + } + for _, tc := range tests { + t.Run(tc.description, func(t *testing.T) { + image, _ := New(WithMounts(tc.mounts)) + require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts()) + }) + } +} + +func makeTestMounts(paths ...string) []specs.Mount { + var mounts []specs.Mount + for _, path := range paths { + mount := specs.Mount{ + Source: "/dev/null", + Destination: filepath.Join(DeviceListAsVolumeMountsRoot, path), + } + mounts = append(mounts, mount) + } + return mounts +} diff --git a/internal/config/image/envvars.go b/internal/config/image/envvars.go new file mode 100644 index 00000000..0789f22f --- /dev/null +++ b/internal/config/image/envvars.go @@ -0,0 +1,31 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package image + +const ( + EnvVarCudaVersion = "CUDA_VERSION" + EnvVarNvidiaDisableRequire = "NVIDIA_DISABLE_REQUIRE" + EnvVarNvidiaDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES" + EnvVarNvidiaImexChannels = "NVIDIA_IMEX_CHANNELS" + EnvVarNvidiaMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES" + EnvVarNvidiaMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES" + EnvVarNvidiaRequireCuda = NvidiaRequirePrefix + "CUDA" + EnvVarNvidiaRequireJetpack = NvidiaRequirePrefix + "JETPACK" + EnvVarNvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" + + NvidiaRequirePrefix = "NVIDIA_REQUIRE_" +) diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index c5af4f88..90cd481b 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -90,11 +90,9 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C } } - envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar) - var devices []string seen := make(map[string]bool) - for _, name := range envDevices.List() { + for _, name := range container.VisibleDevicesFromEnvVar() { if !parser.IsQualifiedName(name) { name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name) } diff --git a/internal/modifier/csv.go b/internal/modifier/csv.go index 0905d5da..a6129432 100644 --- a/internal/modifier/csv.go +++ b/internal/modifier/csv.go @@ -30,23 +30,16 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" ) -const ( - visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES" - visibleDevicesVoid = "void" - - nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK" -) - // NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper. // The modifications are defined by CSV MountSpecs. -func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { - if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 { +func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image.CUDA) (oci.SpecModifier, error) { + if devices := container.VisibleDevicesFromEnvVar(); len(devices) == 0 { logger.Infof("No modification required; no devices requested") return nil, nil } logger.Infof("Constructing modifier from config: %+v", *cfg) - if err := checkRequirements(logger, image); err != nil { + if err := checkRequirements(logger, container); err != nil { return nil, fmt.Errorf("requirements not met: %v", err) } @@ -55,7 +48,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUD return nil, fmt.Errorf("failed to get list of CSV files: %v", err) } - if image.Getenv(nvidiaRequireJetpackEnvvar) != "csv-mounts=all" { + if container.Getenv(image.EnvVarNvidiaRequireJetpack) != "csv-mounts=all" { csvFiles = csv.BaseFilesOnly(csvFiles) } diff --git a/internal/modifier/gated.go b/internal/modifier/gated.go index 5a7141b9..2c39d207 100644 --- a/internal/modifier/gated.go +++ b/internal/modifier/gated.go @@ -36,7 +36,7 @@ import ( // // If not devices are selected, no changes are made. func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { - if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 { + if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 { logger.Infof("No modification required; no devices requested") return nil, nil } diff --git a/internal/modifier/graphics.go b/internal/modifier/graphics.go index d9784d5e..31aa6ef3 100644 --- a/internal/modifier/graphics.go +++ b/internal/modifier/graphics.go @@ -29,8 +29,8 @@ import ( // NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification. // The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made. -func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver) (oci.SpecModifier, error) { - if required, reason := requiresGraphicsModifier(image); !required { +func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerImage image.CUDA, driver *root.Driver) (oci.SpecModifier, error) { + if required, reason := requiresGraphicsModifier(containerImage); !required { logger.Infof("No graphics modifier required: %v", reason) return nil, nil } @@ -50,7 +50,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag devRoot := driver.Root drmNodes, err := discover.NewDRMNodesDiscoverer( logger, - image.DevicesFromEnvvars(visibleDevicesEnvvar), + containerImage.DevicesFromEnvvars(image.EnvVarNvidiaVisibleDevices), devRoot, nvidiaCDIHookPath, ) @@ -67,7 +67,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag // requiresGraphicsModifier determines whether a graphics modifier is required. func requiresGraphicsModifier(cudaImage image.CUDA) (bool, string) { - if devices := cudaImage.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 { + if devices := cudaImage.VisibleDevicesFromEnvVar(); len(devices) == 0 { return false, "no devices requested" } diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index b98f4529..8ed9e5aa 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -24,6 +24,7 @@ import ( "github.com/NVIDIA/go-nvml/pkg/nvml" "tags.cncf.io/container-device-interface/pkg/cdi" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root" "github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils" @@ -200,7 +201,7 @@ func (m *wrapper) GetCommonEdits() (*cdi.ContainerEdits, error) { if err != nil { return nil, err } - edits.Env = append(edits.Env, "NVIDIA_VISIBLE_DEVICES=void") + edits.Env = append(edits.Env, image.EnvVarNvidiaVisibleDevices+"=void") return edits, nil }