mirror of
				https://github.com/NVIDIA/nvidia-container-toolkit
				synced 2025-06-26 18:18:24 +00:00 
			
		
		
		
	Add cuda-compat-mode config option
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
		
							parent
							
								
									dccdfeddd1
								
							
						
					
					
						commit
						aa0cb99bbb
					
				| @ -114,9 +114,19 @@ func doPrestart() { | ||||
| 	} | ||||
| 	args = append(args, "configure") | ||||
| 
 | ||||
| 	if !hook.Features.AllowCUDACompatLibsFromContainer.IsEnabled() { | ||||
| 	switch cli.CUDACompatMode { | ||||
| 	case config.CUDACompatModeLdconfig: | ||||
| 		args = append(args, "--cuda-compat-mode="+config.CUDACompatModeLdconfig) | ||||
| 	case config.CUDACompatModeMount: | ||||
| 		args = append(args, "--cuda-compat-mode="+config.CUDACompatModeMount) | ||||
| 	case config.CUDACompatModeDisabled, config.CUDACompatModeHook: | ||||
| 		args = append(args, "--no-cntlibs") | ||||
| 	default: | ||||
| 		if !hook.Features.AllowCUDACompatLibsFromContainer.IsEnabled() { | ||||
| 			args = append(args, "--no-cntlibs") | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" { | ||||
| 		args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath)) | ||||
| 	} | ||||
|  | ||||
| @ -53,6 +53,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v | ||||
| swarm-resource = "" | ||||
| 
 | ||||
| [nvidia-container-cli] | ||||
|   cuda-compat-mode = "ldconfig" | ||||
|   debug = "" | ||||
|   environment = [] | ||||
|   ldcache = "" | ||||
| @ -114,6 +115,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v | ||||
| swarm-resource = "" | ||||
| 
 | ||||
| [nvidia-container-cli] | ||||
|   cuda-compat-mode = "ldconfig" | ||||
|   debug = "" | ||||
|   environment = [] | ||||
|   ldcache = "" | ||||
| @ -178,6 +180,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v | ||||
| swarm-resource = "" | ||||
| 
 | ||||
| [nvidia-container-cli] | ||||
|   cuda-compat-mode = "ldconfig" | ||||
|   debug = "" | ||||
|   environment = [] | ||||
|   ldcache = "" | ||||
| @ -239,6 +242,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v | ||||
| swarm-resource = "" | ||||
| 
 | ||||
| [nvidia-container-cli] | ||||
|   cuda-compat-mode = "ldconfig" | ||||
|   debug = "" | ||||
|   environment = [] | ||||
|   ldcache = "" | ||||
| @ -322,6 +326,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v | ||||
| swarm-resource = "" | ||||
| 
 | ||||
| [nvidia-container-cli] | ||||
|   cuda-compat-mode = "ldconfig" | ||||
|   debug = "" | ||||
|   environment = [] | ||||
|   ldcache = "" | ||||
|  | ||||
| @ -22,6 +22,13 @@ import ( | ||||
| 	"strings" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	CUDACompatModeMount    = "mount" | ||||
| 	CUDACompatModeLdconfig = "ldconfig" | ||||
| 	CUDACompatModeHook     = "hook" | ||||
| 	CUDACompatModeDisabled = "disabled" | ||||
| ) | ||||
| 
 | ||||
| // ContainerCLIConfig stores the options for the nvidia-container-cli
 | ||||
| type ContainerCLIConfig struct { | ||||
| 	Root        string   `toml:"root"` | ||||
| @ -44,6 +51,9 @@ type ContainerCLIConfig struct { | ||||
| 	// is required, the features.allow-ldconfig-from-container feature gate must
 | ||||
| 	// be enabled explicitly.
 | ||||
| 	Ldconfig ldconfigPath `toml:"ldconfig"` | ||||
| 	// CUDACompatMode sets the mode to be used to make CUDA Forward Compat
 | ||||
| 	// libraries discoverable in the container.
 | ||||
| 	CUDACompatMode string `toml:"cuda-compat-mode,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // NormalizeLDConfigPath returns the resolved path of the configured LDConfig binary.
 | ||||
|  | ||||
| @ -100,9 +100,10 @@ func GetDefault() (*Config, error) { | ||||
| 		AcceptEnvvarUnprivileged:    true, | ||||
| 		SupportedDriverCapabilities: image.SupportedDriverCapabilities.String(), | ||||
| 		NVIDIAContainerCLIConfig: ContainerCLIConfig{ | ||||
| 			LoadKmods: true, | ||||
| 			Ldconfig:  getLdConfigPath(), | ||||
| 			User:      getUserGroup(), | ||||
| 			LoadKmods:      true, | ||||
| 			Ldconfig:       getLdConfigPath(), | ||||
| 			User:           getUserGroup(), | ||||
| 			CUDACompatMode: CUDACompatModeLdconfig, | ||||
| 		}, | ||||
| 		NVIDIACTKConfig: CTKConfig{ | ||||
| 			Path: nvidiaCTKExecutable, | ||||
|  | ||||
| @ -56,9 +56,10 @@ func TestGetConfig(t *testing.T) { | ||||
| 				AcceptEnvvarUnprivileged:    true, | ||||
| 				SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", | ||||
| 				NVIDIAContainerCLIConfig: ContainerCLIConfig{ | ||||
| 					Root:      "", | ||||
| 					LoadKmods: true, | ||||
| 					Ldconfig:  "@/test/ld/config/path", | ||||
| 					Root:           "", | ||||
| 					LoadKmods:      true, | ||||
| 					Ldconfig:       "@/test/ld/config/path", | ||||
| 					CUDACompatMode: "ldconfig", | ||||
| 				}, | ||||
| 				NVIDIAContainerRuntimeConfig: RuntimeConfig{ | ||||
| 					DebugFilePath: "/dev/null", | ||||
| @ -93,6 +94,7 @@ func TestGetConfig(t *testing.T) { | ||||
| 				"nvidia-container-cli.load-kmods = false", | ||||
| 				"nvidia-container-cli.ldconfig = \"@/foo/bar/ldconfig\"", | ||||
| 				"nvidia-container-cli.user = \"foo:bar\"", | ||||
| 				"nvidia-container-cli.cuda-compat-mode = \"mount\"", | ||||
| 				"nvidia-container-runtime.debug = \"/foo/bar\"", | ||||
| 				"nvidia-container-runtime.discover-mode = \"not-legacy\"", | ||||
| 				"nvidia-container-runtime.log-level = \"debug\"", | ||||
| @ -109,10 +111,11 @@ func TestGetConfig(t *testing.T) { | ||||
| 				AcceptEnvvarUnprivileged:    false, | ||||
| 				SupportedDriverCapabilities: "compute,utility", | ||||
| 				NVIDIAContainerCLIConfig: ContainerCLIConfig{ | ||||
| 					Root:      "/bar/baz", | ||||
| 					LoadKmods: false, | ||||
| 					Ldconfig:  "@/foo/bar/ldconfig", | ||||
| 					User:      "foo:bar", | ||||
| 					Root:           "/bar/baz", | ||||
| 					LoadKmods:      false, | ||||
| 					Ldconfig:       "@/foo/bar/ldconfig", | ||||
| 					User:           "foo:bar", | ||||
| 					CUDACompatMode: "mount", | ||||
| 				}, | ||||
| 				NVIDIAContainerRuntimeConfig: RuntimeConfig{ | ||||
| 					DebugFilePath: "/foo/bar", | ||||
| @ -156,8 +159,9 @@ func TestGetConfig(t *testing.T) { | ||||
| 				AcceptEnvvarUnprivileged:    true, | ||||
| 				SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", | ||||
| 				NVIDIAContainerCLIConfig: ContainerCLIConfig{ | ||||
| 					Ldconfig:  "/foo/bar/ldconfig", | ||||
| 					LoadKmods: true, | ||||
| 					Ldconfig:       "/foo/bar/ldconfig", | ||||
| 					LoadKmods:      true, | ||||
| 					CUDACompatMode: "ldconfig", | ||||
| 				}, | ||||
| 				NVIDIAContainerRuntimeConfig: RuntimeConfig{ | ||||
| 					DebugFilePath: "/dev/null", | ||||
| @ -200,6 +204,7 @@ func TestGetConfig(t *testing.T) { | ||||
| 				"root = \"/bar/baz\"", | ||||
| 				"load-kmods = false", | ||||
| 				"ldconfig = \"@/foo/bar/ldconfig\"", | ||||
| 				"cuda-compat-mode = \"mount\"", | ||||
| 				"user = \"foo:bar\"", | ||||
| 				"[nvidia-container-runtime]", | ||||
| 				"debug = \"/foo/bar\"", | ||||
| @ -222,10 +227,11 @@ func TestGetConfig(t *testing.T) { | ||||
| 				AcceptEnvvarUnprivileged:    false, | ||||
| 				SupportedDriverCapabilities: "compute,utility", | ||||
| 				NVIDIAContainerCLIConfig: ContainerCLIConfig{ | ||||
| 					Root:      "/bar/baz", | ||||
| 					LoadKmods: false, | ||||
| 					Ldconfig:  "@/foo/bar/ldconfig", | ||||
| 					User:      "foo:bar", | ||||
| 					Root:           "/bar/baz", | ||||
| 					LoadKmods:      false, | ||||
| 					Ldconfig:       "@/foo/bar/ldconfig", | ||||
| 					CUDACompatMode: "mount", | ||||
| 					User:           "foo:bar", | ||||
| 				}, | ||||
| 				NVIDIAContainerRuntimeConfig: RuntimeConfig{ | ||||
| 					DebugFilePath: "/foo/bar", | ||||
| @ -264,10 +270,11 @@ func TestGetConfig(t *testing.T) { | ||||
| 				AcceptEnvvarUnprivileged:    true, | ||||
| 				SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", | ||||
| 				NVIDIAContainerCLIConfig: ContainerCLIConfig{ | ||||
| 					Root:      "", | ||||
| 					LoadKmods: true, | ||||
| 					Ldconfig:  "@/test/ld/config/path", | ||||
| 					User:      "root:video", | ||||
| 					Root:           "", | ||||
| 					LoadKmods:      true, | ||||
| 					Ldconfig:       "@/test/ld/config/path", | ||||
| 					CUDACompatMode: "ldconfig", | ||||
| 					User:           "root:video", | ||||
| 				}, | ||||
| 				NVIDIAContainerRuntimeConfig: RuntimeConfig{ | ||||
| 					DebugFilePath: "/dev/null", | ||||
| @ -303,10 +310,11 @@ func TestGetConfig(t *testing.T) { | ||||
| 				AcceptEnvvarUnprivileged:    true, | ||||
| 				SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", | ||||
| 				NVIDIAContainerCLIConfig: ContainerCLIConfig{ | ||||
| 					Root:      "", | ||||
| 					LoadKmods: true, | ||||
| 					Ldconfig:  "@/test/ld/config/path", | ||||
| 					User:      "foo:bar", | ||||
| 					Root:           "", | ||||
| 					LoadKmods:      true, | ||||
| 					Ldconfig:       "@/test/ld/config/path", | ||||
| 					CUDACompatMode: "ldconfig", | ||||
| 					User:           "foo:bar", | ||||
| 				}, | ||||
| 				NVIDIAContainerRuntimeConfig: RuntimeConfig{ | ||||
| 					DebugFilePath: "/dev/null", | ||||
|  | ||||
| @ -48,6 +48,7 @@ supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,v | ||||
| #swarm-resource = "DOCKER_RESOURCE_GPU" | ||||
| 
 | ||||
| [nvidia-container-cli] | ||||
| cuda-compat-mode = "ldconfig" | ||||
| #debug = "/var/log/nvidia-container-toolkit.log" | ||||
| environment = [] | ||||
| #ldcache = "/etc/ld.so.cache" | ||||
|  | ||||
| @ -79,24 +79,41 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image | ||||
| 		discoverers = append(discoverers, d) | ||||
| 	} | ||||
| 
 | ||||
| 	if !cfg.Features.AllowCUDACompatLibsFromContainer.IsEnabled() && !cfg.Features.DisableCUDACompatLibHook.IsEnabled() { | ||||
| 		compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver) | ||||
| 		discoverers = append(discoverers, compatLibHookDiscoverer) | ||||
| 		// For legacy mode, we also need to inject a hook to update the LDCache
 | ||||
| 		// after we have modifed the configuration.
 | ||||
| 		if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" { | ||||
| 			ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook( | ||||
| 				logger, | ||||
| 				discover.None{}, | ||||
| 				cfg.NVIDIACTKConfig.Path, | ||||
| 				"", | ||||
| 			) | ||||
| 			if err != nil { | ||||
| 				return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err) | ||||
| 			} | ||||
| 			discoverers = append(discoverers, ldcacheUpdateHookDiscoverer) | ||||
| 	// If the feature flag has explicitly been toggled, we don't make any modification.
 | ||||
| 	if !cfg.Features.DisableCUDACompatLibHook.IsEnabled() { | ||||
| 		cudaCompatDiscoverer, err := getCudaCompatModeDiscoverer(logger, cfg, driver) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("failed to construct CUDA Compat discoverer: %w", err) | ||||
| 		} | ||||
| 		discoverers = append(discoverers, cudaCompatDiscoverer) | ||||
| 	} | ||||
| 
 | ||||
| 	return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...)) | ||||
| } | ||||
| 
 | ||||
| func getCudaCompatModeDiscoverer(logger logger.Interface, cfg *config.Config, driver *root.Driver) (discover.Discover, error) { | ||||
| 	// For legacy mode, we only include the enable-cuda-compat hook if cuda-compat-mode is set to hook.
 | ||||
| 	if cfg.NVIDIAContainerRuntimeConfig.Mode == "legacy" && cfg.NVIDIAContainerCLIConfig.CUDACompatMode != config.CUDACompatModeHook { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver) | ||||
| 	// For non-legacy modes we return the hook as is. These modes *should* already include the update-ldcache hook.
 | ||||
| 	if cfg.NVIDIAContainerRuntimeConfig.Mode != "legacy" { | ||||
| 		return compatLibHookDiscoverer, nil | ||||
| 	} | ||||
| 
 | ||||
| 	// For legacy mode, we also need to inject a hook to update the LDCache
 | ||||
| 	// after we have modifed the configuration.
 | ||||
| 	ldcacheUpdateHookDiscoverer, err := discover.NewLDCacheUpdateHook( | ||||
| 		logger, | ||||
| 		discover.None{}, | ||||
| 		cfg.NVIDIACTKConfig.Path, | ||||
| 		"", | ||||
| 	) | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("failed to construct ldcache update discoverer: %w", err) | ||||
| 	} | ||||
| 
 | ||||
| 	return discover.Merge(compatLibHookDiscoverer, ldcacheUpdateHookDiscoverer), nil | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user