mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-05-05 20:45:15 +00:00
Add cuda-compat-mode config option
Some checks failed
Some checks failed
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
dccdfeddd1
commit
aa0cb99bbb
@ -114,9 +114,19 @@ func doPrestart() {
|
||||
}
|
||||
args = append(args, "configure")
|
||||
|
||||
switch cli.CUDACompatMode {
|
||||
case config.CUDACompatModeLdconfig:
|
||||
args = append(args, "--cuda-compat-mode="+config.CUDACompatModeLdconfig)
|
||||
case config.CUDACompatModeMount:
|
||||
args = append(args, "--cuda-compat-mode="+config.CUDACompatModeMount)
|
||||
case config.CUDACompatModeDisabled, config.CUDACompatModeHook:
|
||||
args = append(args, "--no-cntlibs")
|
||||
default:
|
||||
if !hook.Features.AllowCUDACompatLibsFromContainer.IsEnabled() {
|
||||
args = append(args, "--no-cntlibs")
|
||||
}
|
||||
}
|
||||
|
||||
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
|
||||
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
|
||||
}
|
||||
|
@ -53,6 +53,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
@ -114,6 +115,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
@ -178,6 +180,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
@ -239,6 +242,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
@ -322,6 +326,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
||||
swarm-resource = ""
|
||||
|
||||
[nvidia-container-cli]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
debug = ""
|
||||
environment = []
|
||||
ldcache = ""
|
||||
|
@ -22,6 +22,13 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
CUDACompatModeMount = "mount"
|
||||
CUDACompatModeLdconfig = "ldconfig"
|
||||
CUDACompatModeHook = "hook"
|
||||
CUDACompatModeDisabled = "disabled"
|
||||
)
|
||||
|
||||
// ContainerCLIConfig stores the options for the nvidia-container-cli
|
||||
type ContainerCLIConfig struct {
|
||||
Root string `toml:"root"`
|
||||
@ -44,6 +51,9 @@ type ContainerCLIConfig struct {
|
||||
// is required, the features.allow-ldconfig-from-container feature gate must
|
||||
// be enabled explicitly.
|
||||
Ldconfig ldconfigPath `toml:"ldconfig"`
|
||||
// CUDACompatMode sets the mode to be used to make CUDA Forward Compat
|
||||
// libraries discoverable in the container.
|
||||
CUDACompatMode string `toml:"cuda-compat-mode,omitempty"`
|
||||
}
|
||||
|
||||
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
||||
|
@ -103,6 +103,7 @@ func GetDefault() (*Config, error) {
|
||||
LoadKmods: true,
|
||||
Ldconfig: getLdConfigPath(),
|
||||
User: getUserGroup(),
|
||||
CUDACompatMode: CUDACompatModeLdconfig,
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: nvidiaCTKExecutable,
|
||||
|
@ -59,6 +59,7 @@ func TestGetConfig(t *testing.T) {
|
||||
Root: "",
|
||||
LoadKmods: true,
|
||||
Ldconfig: "@/test/ld/config/path",
|
||||
CUDACompatMode: "ldconfig",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
@ -93,6 +94,7 @@ func TestGetConfig(t *testing.T) {
|
||||
"nvidia-container-cli.load-kmods = false",
|
||||
"nvidia-container-cli.ldconfig = \"@/foo/bar/ldconfig\"",
|
||||
"nvidia-container-cli.user = \"foo:bar\"",
|
||||
"nvidia-container-cli.cuda-compat-mode = \"mount\"",
|
||||
"nvidia-container-runtime.debug = \"/foo/bar\"",
|
||||
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
|
||||
"nvidia-container-runtime.log-level = \"debug\"",
|
||||
@ -113,6 +115,7 @@ func TestGetConfig(t *testing.T) {
|
||||
LoadKmods: false,
|
||||
Ldconfig: "@/foo/bar/ldconfig",
|
||||
User: "foo:bar",
|
||||
CUDACompatMode: "mount",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
@ -158,6 +161,7 @@ func TestGetConfig(t *testing.T) {
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Ldconfig: "/foo/bar/ldconfig",
|
||||
LoadKmods: true,
|
||||
CUDACompatMode: "ldconfig",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
@ -200,6 +204,7 @@ func TestGetConfig(t *testing.T) {
|
||||
"root = \"/bar/baz\"",
|
||||
"load-kmods = false",
|
||||
"ldconfig = \"@/foo/bar/ldconfig\"",
|
||||
"cuda-compat-mode = \"mount\"",
|
||||
"user = \"foo:bar\"",
|
||||
"[nvidia-container-runtime]",
|
||||
"debug = \"/foo/bar\"",
|
||||
@ -225,6 +230,7 @@ func TestGetConfig(t *testing.T) {
|
||||
Root: "/bar/baz",
|
||||
LoadKmods: false,
|
||||
Ldconfig: "@/foo/bar/ldconfig",
|
||||
CUDACompatMode: "mount",
|
||||
User: "foo:bar",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
@ -267,6 +273,7 @@ func TestGetConfig(t *testing.T) {
|
||||
Root: "",
|
||||
LoadKmods: true,
|
||||
Ldconfig: "@/test/ld/config/path",
|
||||
CUDACompatMode: "ldconfig",
|
||||
User: "root:video",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
@ -306,6 +313,7 @@ func TestGetConfig(t *testing.T) {
|
||||
Root: "",
|
||||
LoadKmods: true,
|
||||
Ldconfig: "@/test/ld/config/path",
|
||||
CUDACompatMode: "ldconfig",
|
||||
User: "foo:bar",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
|
@ -48,6 +48,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
|
||||
[nvidia-container-cli]
|
||||
cuda-compat-mode = "ldconfig"
|
||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||
environment = []
|
||||
#ldcache = "/etc/ld.so.cache"
|
||||
|
@ -79,12 +79,32 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
if !cfg.Features.AllowCUDACompatLibsFromContainer.IsEnabled() && !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
|
||||
// If the feature flag has explicitly been toggled, we don't make any modification.
|
||||
if !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
|
||||
cudaCompatDiscoverer, err := getCudaCompatModeDiscoverer(logger, cfg, driver)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct CUDA Compat discoverer: %w", err)
|
||||
}
|
||||
discoverers = append(discoverers, cudaCompatDiscoverer)
|
||||
}
|
||||
|
||||
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
|
||||
}
|
||||
|
||||
func getCudaCompatModeDiscoverer(logger logger.Interface, cfg *config.Config, driver *root.Driver) (discover.Discover, error) {
|
||||
// For legacy mode, we only include the enable-cuda-compat hook if cuda-compat-mode is set to hook.
|
||||
if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" && cfg.NVIDIAContainerCLIConfig.CUDACompatMode != config.CUDACompatModeHook {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver)
|
||||
discoverers = append(discoverers, compatLibHookDiscoverer)
|
||||
// For non-legacy modes we return the hook as is. These modes *should* already include the update-ldcache hook.
|
||||
if cfg.NVIDIAContainerRuntimeConfig.Mode != "legacy" {
|
||||
return compatLibHookDiscoverer, nil
|
||||
}
|
||||
|
||||
// For legacy mode, we also need to inject a hook to update the LDCache
|
||||
// after we have modifed the configuration.
|
||||
if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" {
|
||||
ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook(
|
||||
logger,
|
||||
discover.None{},
|
||||
@ -94,9 +114,6 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err)
|
||||
}
|
||||
discoverers = append(discoverers, ldcacheUpdateHookDiscoverer)
|
||||
}
|
||||
}
|
||||
|
||||
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
|
||||
return discover.Merge(compatLibHookDiscoverer, ldcacheUpdateHookDiscoverer), nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user