mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
[no-relnote] Use image.CUDA to extract visible devices
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
@@ -47,7 +47,7 @@ func New(opt ...Option) (CUDA, error) {
|
||||
// build creates a CUDA image from the builder.
|
||||
func (b builder) build() (CUDA, error) {
|
||||
if b.disableRequire {
|
||||
b.env[envNVDisableRequire] = "true"
|
||||
b.env[EnvVarNvidiaDisableRequire] = "true"
|
||||
}
|
||||
|
||||
c := CUDA{
|
||||
|
||||
@@ -28,12 +28,9 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
DeviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||
|
||||
volumeMountDevicePrefixCDI = "cdi/"
|
||||
)
|
||||
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
@@ -80,8 +77,8 @@ func (i CUDA) HasEnvvar(key string) bool {
|
||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||
func (i CUDA) IsLegacy() bool {
|
||||
legacyCudaVersion := i.env[envCUDAVersion]
|
||||
cudaRequire := i.env[envNVRequireCUDA]
|
||||
legacyCudaVersion := i.env[EnvVarCudaVersion]
|
||||
cudaRequire := i.env[EnvVarNvidiaRequireCuda]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
@@ -95,7 +92,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range i.env {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
||||
if strings.HasPrefix(name, NvidiaRequirePrefix) && !strings.HasPrefix(name, EnvVarNvidiaRequireJetpack) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
@@ -113,7 +110,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||
func (i CUDA) HasDisableRequire() bool {
|
||||
if disable, exists := i.env[envNVDisableRequire]; exists {
|
||||
if disable, exists := i.env[EnvVarNvidiaDisableRequire]; exists {
|
||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||
d, _ := strconv.ParseBool(disable)
|
||||
return d
|
||||
@@ -157,7 +154,7 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
|
||||
// GetDriverCapabilities returns the requested driver capabilities.
|
||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
env := i.env[envNVDriverCapabilities]
|
||||
env := i.env[EnvVarNvidiaDriverCapabilities]
|
||||
|
||||
capabilities := make(DriverCapabilities)
|
||||
for _, c := range strings.Split(env, ",") {
|
||||
@@ -168,7 +165,7 @@ func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
cudaVersion := i.env[envCUDAVersion]
|
||||
cudaVersion := i.env[EnvVarCudaVersion]
|
||||
majorMinor, err := parseMajorMinorVersion(cudaVersion)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
|
||||
@@ -202,7 +199,7 @@ func parseMajorMinorVersion(version string) (string, error) {
|
||||
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
||||
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||
var hasCDIdevice bool
|
||||
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
||||
for _, device := range i.VisibleDevicesFromEnvVar() {
|
||||
if !parser.IsQualifiedName(device) {
|
||||
return false
|
||||
}
|
||||
@@ -218,14 +215,28 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||
return hasCDIdevice
|
||||
}
|
||||
|
||||
const (
|
||||
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||
)
|
||||
// VisibleDevicesFromEnvVar returns the set of visible devices requested through
|
||||
// the NVIDIA_VISIBLE_DEVICES environment variable.
|
||||
func (i CUDA) VisibleDevicesFromEnvVar() []string {
|
||||
return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
|
||||
}
|
||||
|
||||
// VisibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
||||
func (i CUDA) VisibleDevicesFromMounts() []string {
|
||||
var devices []string
|
||||
for _, device := range i.DevicesFromMounts() {
|
||||
if strings.HasPrefix(device, volumeMountDevicePrefixCDI) {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, device)
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
// DevicesFromMounts returns a list of device specified as mounts.
|
||||
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||
func (i CUDA) DevicesFromMounts() []string {
|
||||
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
||||
root := filepath.Clean(DeviceListAsVolumeMountsRoot)
|
||||
seen := make(map[string]bool)
|
||||
var devices []string
|
||||
for _, m := range i.mounts {
|
||||
@@ -260,10 +271,10 @@ func (i CUDA) DevicesFromMounts() []string {
|
||||
func (i CUDA) CDIDevicesFromMounts() []string {
|
||||
var devices []string
|
||||
for _, mountDevice := range i.DevicesFromMounts() {
|
||||
if !strings.HasPrefix(mountDevice, "cdi/") {
|
||||
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixCDI) {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
|
||||
parts := strings.SplitN(strings.TrimPrefix(mountDevice, volumeMountDevicePrefixCDI), "/", 3)
|
||||
if len(parts) != 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -17,8 +17,10 @@
|
||||
package image
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@@ -130,3 +132,80 @@ func TestGetRequirements(t *testing.T) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
mounts []specs.Mount
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "No mounts",
|
||||
mounts: nil,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "Host path is not /dev/null",
|
||||
mounts: []specs.Mount{
|
||||
{
|
||||
Source: "/not/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
},
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "Container path is not prefixed by 'root'",
|
||||
mounts: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join("/other/prefix", "GPU0"),
|
||||
},
|
||||
},
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "Container path is only 'root'",
|
||||
mounts: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: DeviceListAsVolumeMountsRoot,
|
||||
},
|
||||
},
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "Discover 2 devices",
|
||||
mounts: makeTestMounts("GPU0", "GPU1"),
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "Discover 2 devices with slashes in the name",
|
||||
mounts: makeTestMounts("GPU0-MIG0/0/1", "GPU1-MIG0/0/1"),
|
||||
expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"},
|
||||
},
|
||||
{
|
||||
description: "cdi devices are ignored",
|
||||
mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"),
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := New(WithMounts(tc.mounts))
|
||||
require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func makeTestMounts(paths ...string) []specs.Mount {
|
||||
var mounts []specs.Mount
|
||||
for _, path := range paths {
|
||||
mount := specs.Mount{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, path),
|
||||
}
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
return mounts
|
||||
}
|
||||
|
||||
31
internal/config/image/envvars.go
Normal file
31
internal/config/image/envvars.go
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
# Copyright 2024 NVIDIA CORPORATION
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
const (
|
||||
EnvVarCudaVersion = "CUDA_VERSION"
|
||||
EnvVarNvidiaDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
EnvVarNvidiaDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
EnvVarNvidiaImexChannels = "NVIDIA_IMEX_CHANNELS"
|
||||
EnvVarNvidiaMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
|
||||
EnvVarNvidiaMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
|
||||
EnvVarNvidiaRequireCuda = NvidiaRequirePrefix + "CUDA"
|
||||
EnvVarNvidiaRequireJetpack = NvidiaRequirePrefix + "JETPACK"
|
||||
EnvVarNvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
||||
|
||||
NvidiaRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
)
|
||||
Reference in New Issue
Block a user