mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-05-05 20:45:15 +00:00
Add cuda-compat-mode config option
Some checks failed
Some checks failed
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
dccdfeddd1
commit
aa0cb99bbb
@ -114,9 +114,19 @@ func doPrestart() {
|
|||||||
}
|
}
|
||||||
args = append(args, "configure")
|
args = append(args, "configure")
|
||||||
|
|
||||||
if !hook.Features.AllowCUDACompatLibsFromContainer.IsEnabled() {
|
switch cli.CUDACompatMode {
|
||||||
|
case config.CUDACompatModeLdconfig:
|
||||||
|
args = append(args, "--cuda-compat-mode="+config.CUDACompatModeLdconfig)
|
||||||
|
case config.CUDACompatModeMount:
|
||||||
|
args = append(args, "--cuda-compat-mode="+config.CUDACompatModeMount)
|
||||||
|
case config.CUDACompatModeDisabled, config.CUDACompatModeHook:
|
||||||
args = append(args, "--no-cntlibs")
|
args = append(args, "--no-cntlibs")
|
||||||
|
default:
|
||||||
|
if !hook.Features.AllowCUDACompatLibsFromContainer.IsEnabled() {
|
||||||
|
args = append(args, "--no-cntlibs")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
|
if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" {
|
||||||
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
|
args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath))
|
||||||
}
|
}
|
||||||
|
@ -53,6 +53,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
|||||||
swarm-resource = ""
|
swarm-resource = ""
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
|
cuda-compat-mode = "ldconfig"
|
||||||
debug = ""
|
debug = ""
|
||||||
environment = []
|
environment = []
|
||||||
ldcache = ""
|
ldcache = ""
|
||||||
@ -114,6 +115,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
|||||||
swarm-resource = ""
|
swarm-resource = ""
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
|
cuda-compat-mode = "ldconfig"
|
||||||
debug = ""
|
debug = ""
|
||||||
environment = []
|
environment = []
|
||||||
ldcache = ""
|
ldcache = ""
|
||||||
@ -178,6 +180,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
|||||||
swarm-resource = ""
|
swarm-resource = ""
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
|
cuda-compat-mode = "ldconfig"
|
||||||
debug = ""
|
debug = ""
|
||||||
environment = []
|
environment = []
|
||||||
ldcache = ""
|
ldcache = ""
|
||||||
@ -239,6 +242,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
|||||||
swarm-resource = ""
|
swarm-resource = ""
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
|
cuda-compat-mode = "ldconfig"
|
||||||
debug = ""
|
debug = ""
|
||||||
environment = []
|
environment = []
|
||||||
ldcache = ""
|
ldcache = ""
|
||||||
@ -322,6 +326,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
|||||||
swarm-resource = ""
|
swarm-resource = ""
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
|
cuda-compat-mode = "ldconfig"
|
||||||
debug = ""
|
debug = ""
|
||||||
environment = []
|
environment = []
|
||||||
ldcache = ""
|
ldcache = ""
|
||||||
|
@ -22,6 +22,13 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
CUDACompatModeMount = "mount"
|
||||||
|
CUDACompatModeLdconfig = "ldconfig"
|
||||||
|
CUDACompatModeHook = "hook"
|
||||||
|
CUDACompatModeDisabled = "disabled"
|
||||||
|
)
|
||||||
|
|
||||||
// ContainerCLIConfig stores the options for the nvidia-container-cli
|
// ContainerCLIConfig stores the options for the nvidia-container-cli
|
||||||
type ContainerCLIConfig struct {
|
type ContainerCLIConfig struct {
|
||||||
Root string `toml:"root"`
|
Root string `toml:"root"`
|
||||||
@ -44,6 +51,9 @@ type ContainerCLIConfig struct {
|
|||||||
// is required, the features.allow-ldconfig-from-container feature gate must
|
// is required, the features.allow-ldconfig-from-container feature gate must
|
||||||
// be enabled explicitly.
|
// be enabled explicitly.
|
||||||
Ldconfig ldconfigPath `toml:"ldconfig"`
|
Ldconfig ldconfigPath `toml:"ldconfig"`
|
||||||
|
// CUDACompatMode sets the mode to be used to make CUDA Forward Compat
|
||||||
|
// libraries discoverable in the container.
|
||||||
|
CUDACompatMode string `toml:"cuda-compat-mode,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
// NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
|
||||||
|
@ -100,9 +100,10 @@ func GetDefault() (*Config, error) {
|
|||||||
AcceptEnvvarUnprivileged: true,
|
AcceptEnvvarUnprivileged: true,
|
||||||
SupportedDriverCapabilities: image.SupportedDriverCapabilities.String(),
|
SupportedDriverCapabilities: image.SupportedDriverCapabilities.String(),
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
LoadKmods: true,
|
LoadKmods: true,
|
||||||
Ldconfig: getLdConfigPath(),
|
Ldconfig: getLdConfigPath(),
|
||||||
User: getUserGroup(),
|
User: getUserGroup(),
|
||||||
|
CUDACompatMode: CUDACompatModeLdconfig,
|
||||||
},
|
},
|
||||||
NVIDIACTKConfig: CTKConfig{
|
NVIDIACTKConfig: CTKConfig{
|
||||||
Path: nvidiaCTKExecutable,
|
Path: nvidiaCTKExecutable,
|
||||||
|
@ -56,9 +56,10 @@ func TestGetConfig(t *testing.T) {
|
|||||||
AcceptEnvvarUnprivileged: true,
|
AcceptEnvvarUnprivileged: true,
|
||||||
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Root: "",
|
Root: "",
|
||||||
LoadKmods: true,
|
LoadKmods: true,
|
||||||
Ldconfig: "@/test/ld/config/path",
|
Ldconfig: "@/test/ld/config/path",
|
||||||
|
CUDACompatMode: "ldconfig",
|
||||||
},
|
},
|
||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
@ -93,6 +94,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
"nvidia-container-cli.load-kmods = false",
|
"nvidia-container-cli.load-kmods = false",
|
||||||
"nvidia-container-cli.ldconfig = \"@/foo/bar/ldconfig\"",
|
"nvidia-container-cli.ldconfig = \"@/foo/bar/ldconfig\"",
|
||||||
"nvidia-container-cli.user = \"foo:bar\"",
|
"nvidia-container-cli.user = \"foo:bar\"",
|
||||||
|
"nvidia-container-cli.cuda-compat-mode = \"mount\"",
|
||||||
"nvidia-container-runtime.debug = \"/foo/bar\"",
|
"nvidia-container-runtime.debug = \"/foo/bar\"",
|
||||||
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
|
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
|
||||||
"nvidia-container-runtime.log-level = \"debug\"",
|
"nvidia-container-runtime.log-level = \"debug\"",
|
||||||
@ -109,10 +111,11 @@ func TestGetConfig(t *testing.T) {
|
|||||||
AcceptEnvvarUnprivileged: false,
|
AcceptEnvvarUnprivileged: false,
|
||||||
SupportedDriverCapabilities: "compute,utility",
|
SupportedDriverCapabilities: "compute,utility",
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Root: "/bar/baz",
|
Root: "/bar/baz",
|
||||||
LoadKmods: false,
|
LoadKmods: false,
|
||||||
Ldconfig: "@/foo/bar/ldconfig",
|
Ldconfig: "@/foo/bar/ldconfig",
|
||||||
User: "foo:bar",
|
User: "foo:bar",
|
||||||
|
CUDACompatMode: "mount",
|
||||||
},
|
},
|
||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/foo/bar",
|
DebugFilePath: "/foo/bar",
|
||||||
@ -156,8 +159,9 @@ func TestGetConfig(t *testing.T) {
|
|||||||
AcceptEnvvarUnprivileged: true,
|
AcceptEnvvarUnprivileged: true,
|
||||||
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Ldconfig: "/foo/bar/ldconfig",
|
Ldconfig: "/foo/bar/ldconfig",
|
||||||
LoadKmods: true,
|
LoadKmods: true,
|
||||||
|
CUDACompatMode: "ldconfig",
|
||||||
},
|
},
|
||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
@ -200,6 +204,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
"root = \"/bar/baz\"",
|
"root = \"/bar/baz\"",
|
||||||
"load-kmods = false",
|
"load-kmods = false",
|
||||||
"ldconfig = \"@/foo/bar/ldconfig\"",
|
"ldconfig = \"@/foo/bar/ldconfig\"",
|
||||||
|
"cuda-compat-mode = \"mount\"",
|
||||||
"user = \"foo:bar\"",
|
"user = \"foo:bar\"",
|
||||||
"[nvidia-container-runtime]",
|
"[nvidia-container-runtime]",
|
||||||
"debug = \"/foo/bar\"",
|
"debug = \"/foo/bar\"",
|
||||||
@ -222,10 +227,11 @@ func TestGetConfig(t *testing.T) {
|
|||||||
AcceptEnvvarUnprivileged: false,
|
AcceptEnvvarUnprivileged: false,
|
||||||
SupportedDriverCapabilities: "compute,utility",
|
SupportedDriverCapabilities: "compute,utility",
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Root: "/bar/baz",
|
Root: "/bar/baz",
|
||||||
LoadKmods: false,
|
LoadKmods: false,
|
||||||
Ldconfig: "@/foo/bar/ldconfig",
|
Ldconfig: "@/foo/bar/ldconfig",
|
||||||
User: "foo:bar",
|
CUDACompatMode: "mount",
|
||||||
|
User: "foo:bar",
|
||||||
},
|
},
|
||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/foo/bar",
|
DebugFilePath: "/foo/bar",
|
||||||
@ -264,10 +270,11 @@ func TestGetConfig(t *testing.T) {
|
|||||||
AcceptEnvvarUnprivileged: true,
|
AcceptEnvvarUnprivileged: true,
|
||||||
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Root: "",
|
Root: "",
|
||||||
LoadKmods: true,
|
LoadKmods: true,
|
||||||
Ldconfig: "@/test/ld/config/path",
|
Ldconfig: "@/test/ld/config/path",
|
||||||
User: "root:video",
|
CUDACompatMode: "ldconfig",
|
||||||
|
User: "root:video",
|
||||||
},
|
},
|
||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
@ -303,10 +310,11 @@ func TestGetConfig(t *testing.T) {
|
|||||||
AcceptEnvvarUnprivileged: true,
|
AcceptEnvvarUnprivileged: true,
|
||||||
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Root: "",
|
Root: "",
|
||||||
LoadKmods: true,
|
LoadKmods: true,
|
||||||
Ldconfig: "@/test/ld/config/path",
|
Ldconfig: "@/test/ld/config/path",
|
||||||
User: "foo:bar",
|
CUDACompatMode: "ldconfig",
|
||||||
|
User: "foo:bar",
|
||||||
},
|
},
|
||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
|
@ -48,6 +48,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v
|
|||||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
|
cuda-compat-mode = "ldconfig"
|
||||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||||
environment = []
|
environment = []
|
||||||
#ldcache = "/etc/ld.so.cache"
|
#ldcache = "/etc/ld.so.cache"
|
||||||
|
@ -79,24 +79,41 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
|
|||||||
discoverers = append(discoverers, d)
|
discoverers = append(discoverers, d)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !cfg.Features.AllowCUDACompatLibsFromContainer.IsEnabled() && !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
|
// If the feature flag has explicitly been toggled, we don't make any modification.
|
||||||
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver)
|
if !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
|
||||||
discoverers = append(discoverers, compatLibHookDiscoverer)
|
cudaCompatDiscoverer, err := getCudaCompatModeDiscoverer(logger, cfg, driver)
|
||||||
// For legacy mode, we also need to inject a hook to update the LDCache
|
if err != nil {
|
||||||
// after we have modifed the configuration.
|
return nil, fmt.Errorf("failed to construct CUDA Compat discoverer: %w", err)
|
||||||
if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" {
|
|
||||||
ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook(
|
|
||||||
logger,
|
|
||||||
discover.None{},
|
|
||||||
cfg.NVIDIACTKConfig.Path,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err)
|
|
||||||
}
|
|
||||||
discoverers = append(discoverers, ldcacheUpdateHookDiscoverer)
|
|
||||||
}
|
}
|
||||||
|
discoverers = append(discoverers, cudaCompatDiscoverer)
|
||||||
}
|
}
|
||||||
|
|
||||||
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
|
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getCudaCompatModeDiscoverer(logger logger.Interface, cfg *config.Config, driver *root.Driver) (discover.Discover, error) {
|
||||||
|
// For legacy mode, we only include the enable-cuda-compat hook if cuda-compat-mode is set to hook.
|
||||||
|
if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" && cfg.NVIDIAContainerCLIConfig.CUDACompatMode != config.CUDACompatModeHook {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver)
|
||||||
|
// For non-legacy modes we return the hook as is. These modes *should* already include the update-ldcache hook.
|
||||||
|
if cfg.NVIDIAContainerRuntimeConfig.Mode != "legacy" {
|
||||||
|
return compatLibHookDiscoverer, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// For legacy mode, we also need to inject a hook to update the LDCache
|
||||||
|
// after we have modifed the configuration.
|
||||||
|
ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook(
|
||||||
|
logger,
|
||||||
|
discover.None{},
|
||||||
|
cfg.NVIDIACTKConfig.Path,
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return discover.Merge(compatLibHookDiscoverer, ldcacheUpdateHookDiscoverer), nil
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user