Add cuda-compat-mode config option

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2025-04-29 14:00:12 +02:00
parent de3d736663
commit d90a03b794
7 changed files with 97 additions and 40 deletions

View File

@@ -79,24 +79,46 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
discoverers = append(discoverers, d)
}
if !cfg.Features.AllowCUDACompatLibsFromContainer.IsEnabled() && !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
// If the feature flag has explicitly been toggled, we don't make any modification.
if !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
cudaCompatDiscoverer, err := getCudaCompatModeDiscoverer(logger, cfg, driver)
if err != nil {
return nil, fmt.Errorf("failed to construct CUDA Compat discoverer: %w", err)
}
discoverers = append(discoverers, cudaCompatDiscoverer)
}
if cfg.NVIDIAContainerRuntimeConfig.Mode != "legacy" && !cfg.Features.DisableCUDACompatLibHook.IsEnabled() {
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver)
discoverers = append(discoverers, compatLibHookDiscoverer)
// For legacy mode, we also need to inject a hook to update the LDCache
// after we have modifed the configuration.
if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" {
ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook(
logger,
discover.None{},
cfg.NVIDIACTKConfig.Path,
"",
)
if err != nil {
return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err)
}
discoverers = append(discoverers, ldcacheUpdateHookDiscoverer)
}
}
return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...))
}
func getCudaCompatModeDiscoverer(logger logger.Interface, cfg *config.Config, driver *root.Driver) (discover.Discover, error) {
// For legacy mode, we only include the enable-cuda-compat hook if cuda-compat-mode is set to hook.
if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" && cfg.NVIDIAContainerCLIConfig.CUDACompatMode != config.CUDACompatModeHook {
return nil, nil
}
compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver)
// For non-legacy modes we return the hook as is. These modes *should* already include the update-ldcache hook.
if cfg.NVIDIAContainerRuntimeConfig.Mode != "legacy" {
return compatLibHookDiscoverer, nil
}
// For legacy mode, we also need to inject a hook to update the LDCache
// after we have modifed the configuration.
ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook(
logger,
discover.None{},
cfg.NVIDIACTKConfig.Path,
"",
)
if err != nil {
return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err)
}
return discover.Merge(compatLibHookDiscoverer, ldcacheUpdateHookDiscoverer), nil
}