From 6cf0248321e87abc84bf2c0ee75264754794ec29 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Thu, 22 May 2025 13:51:15 +0200 Subject: [PATCH] Added ability to disable specific (or all) CDI hooks This change adds the ability to disabled specific (or all) CDI hooks to both the nvidia-ctk cdi generate command and the nvcdi API. Signed-off-by: Carlos Eduardo Arango Gutierrez Signed-off-by: Evan Lezar --- cmd/nvidia-ctk/cdi/generate/generate.go | 22 ++- cmd/nvidia-ctk/cdi/generate/generate_test.go | 186 ++++++++++++++++++ internal/discover/graphics_test.go | 2 +- internal/discover/hooks.go | 175 +++++++++++----- internal/discover/ldconfig.go | 2 +- internal/discover/ldconfig_test.go | 2 +- internal/discover/symlinks_test.go | 12 +- internal/platform-support/tegra/csv_test.go | 2 +- internal/runtime/runtime_factory.go | 6 +- pkg/nvcdi/api.go | 23 ++- pkg/nvcdi/driver-nvml.go | 8 +- pkg/nvcdi/driver-wsl.go | 2 +- pkg/nvcdi/driver-wsl_test.go | 8 +- pkg/nvcdi/hooks.go | 27 --- pkg/nvcdi/lib.go | 11 +- .../workarounds-device-folder-permissions.go | 13 +- 16 files changed, 377 insertions(+), 124 deletions(-) delete mode 100644 pkg/nvcdi/hooks.go diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index b187335b..1f549515 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -57,6 +57,7 @@ type options struct { configSearchPaths cli.StringSlice librarySearchPaths cli.StringSlice + disabledHooks cli.StringSlice csv struct { files cli.StringSlice @@ -173,9 +174,18 @@ func (m command) build() *cli.Command { }, &cli.StringSliceFlag{ Name: "csv.ignore-pattern", - Usage: "Specify a pattern the CSV mount specifications.", + Usage: "specify a pattern the CSV mount specifications.", Destination: &opts.csv.ignorePatterns, }, + &cli.StringSliceFlag{ + Name: "disable-hook", + Aliases: []string{"disable-hooks"}, + Usage: "specify a specific hook to skip when generating CDI " + + "specifications. This can be specified multiple times and the " + + "special hook name 'all' can be used ensure that the generated " + + "CDI specification does not include any hooks.", + Destination: &opts.disabledHooks, + }, } return &c @@ -262,7 +272,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { deviceNamers = append(deviceNamers, deviceNamer) } - cdilib, err := nvcdi.New( + cdiOptions := []nvcdi.Option{ nvcdi.WithLogger(m.logger), nvcdi.WithDriverRoot(opts.driverRoot), nvcdi.WithDevRoot(opts.devRoot), @@ -276,7 +286,13 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()), // We set the following to allow for dependency injection: nvcdi.WithNvmlLib(opts.nvmllib), - ) + } + + for _, hook := range opts.disabledHooks.Value() { + cdiOptions = append(cdiOptions, nvcdi.WithDisabledHook(hook)) + } + + cdilib, err := nvcdi.New(cdiOptions...) if err != nil { return nil, fmt.Errorf("failed to create CDI library: %v", err) } diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index 7007ed04..a08ab19c 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -26,6 +26,7 @@ import ( "github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100" testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" + "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/internal/test" ) @@ -119,6 +120,185 @@ containerEdits: - nodev - rbind - rprivate +`, + }, + { + description: "disableHooks1", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")), + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")), + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl + hooks: + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-symlinks + - --link + - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + env: + - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - update-ldcache + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false + mounts: + - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - rbind + - rprivate +`, + }, + { + description: "disableHooks2", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")), + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")), + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl + hooks: + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-symlinks + - --link + - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + env: + - NVIDIA_CTK_DEBUG=false + mounts: + - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - rbind + - rprivate +`, + }, + { + description: "disableHooksAll", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disabledHooks: valueOf(cli.NewStringSlice("all")), + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disabledHooks: valueOf(cli.NewStringSlice("all")), + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl + mounts: + - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - rbind + - rprivate `, }, } @@ -162,3 +342,9 @@ containerEdits: }) } } + +// valueOf returns the value of a pointer. +// Note that this does not check for a nil pointer and is only used for testing. +func valueOf[T any](v *T) T { + return *v +} diff --git a/internal/discover/graphics_test.go b/internal/discover/graphics_test.go index 72aa7dcc..8620b048 100644 --- a/internal/discover/graphics_test.go +++ b/internal/discover/graphics_test.go @@ -25,7 +25,7 @@ import ( func TestGraphicsLibrariesDiscoverer(t *testing.T) { logger, _ := testlog.NewNullLogger() - hookCreator := NewHookCreator("/usr/bin/nvidia-cdi-hook", false) + hookCreator := NewHookCreator() testCases := []struct { description string diff --git a/internal/discover/hooks.go b/internal/discover/hooks.go index f20d000c..5e2cdec4 100644 --- a/internal/discover/hooks.go +++ b/internal/discover/hooks.go @@ -23,6 +23,28 @@ import ( "tags.cncf.io/container-device-interface/pkg/cdi" ) +// A HookName represents a supported CDI hooks. +type HookName string + +const ( + // AllHooks is a special hook name that allows all hooks to be matched. + AllHooks = HookName("all") + + // A ChmodHook is used to set the file mode of the specified paths. + // Deprecated: The chmod hook is deprecated and will be removed in a future release. + ChmodHook = HookName("chmod") + // A CreateSymlinksHook is used to create symlinks in the container. + CreateSymlinksHook = HookName("create-symlinks") + // An EnableCudaCompatHook is used to enabled CUDA Forward Compatibility. + // Added in v1.17.5 + EnableCudaCompatHook = HookName("enable-cuda-compat") + // An UpdateLDCacheHook is the hook used to update the ldcache in the + // container. This allows injected libraries to be discoverable. + UpdateLDCacheHook = HookName("update-ldcache") + + defaultNvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" +) + var _ Discover = (*Hook)(nil) // Devices returns an empty list of devices for a Hook discoverer. @@ -45,75 +67,130 @@ func (h *Hook) Hooks() ([]Hook, error) { return []Hook{*h}, nil } -type HookName string +type Option func(*cdiHookCreator) -// DisabledHooks allows individual hooks to be disabled. -type DisabledHooks map[HookName]bool - -const ( - // HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility. - // This was added with v1.17.5 of the NVIDIA Container Toolkit. - HookEnableCudaCompat = HookName("enable-cuda-compat") - // directory path to be mounted into a container. - HookCreateSymlinks = HookName("create-symlinks") - // HookUpdateLDCache refers to the hook used to Update the dynamic linker - // cache inside the directory path to be mounted into a container. - HookUpdateLDCache = HookName("update-ldcache") -) - -// AllHooks maintains a future-proof list of all defined hooks. -var AllHooks = []HookName{ - HookEnableCudaCompat, - HookCreateSymlinks, - HookUpdateLDCache, -} - -// Option is a function that configures the nvcdilib -type Option func(*CDIHook) - -type CDIHook struct { +type cdiHookCreator struct { nvidiaCDIHookPath string - debugLogging bool + disabledHooks map[HookName]bool + + fixedArgs []string + debugLogging bool } +// An allDisabledHookCreator is a HookCreator that does not create any hooks. +type allDisabledHookCreator struct{} + +// Create returns nil for all hooks for an allDisabledHookCreator. +func (a *allDisabledHookCreator) Create(name HookName, args ...string) *Hook { + return nil +} + +// A HookCreator defines an interface for creating discover hooks. type HookCreator interface { Create(HookName, ...string) *Hook } -func NewHookCreator(nvidiaCDIHookPath string, debugLogging bool) HookCreator { - CDIHook := &CDIHook{ - nvidiaCDIHookPath: nvidiaCDIHookPath, - debugLogging: debugLogging, +// WithDisabledHooks sets the set of hooks that are disabled for the CDI hook creator. +// This can be specified multiple times. +func WithDisabledHooks(hooks ...HookName) Option { + return func(c *cdiHookCreator) { + for _, hook := range hooks { + c.disabledHooks[hook] = true + } } - - return CDIHook } -func (c CDIHook) Create(name HookName, args ...string) *Hook { - if name == "create-symlinks" { - if len(args) == 0 { - return nil - } +// WithNVIDIACDIHookPath sets the path to the nvidia-cdi-hook binary. +func WithNVIDIACDIHookPath(nvidiaCDIHookPath string) Option { + return func(c *cdiHookCreator) { + c.nvidiaCDIHookPath = nvidiaCDIHookPath + } +} - links := []string{} - for _, arg := range args { - links = append(links, "--link", arg) - } - args = links +func NewHookCreator(opts ...Option) HookCreator { + cdiHookCreator := &cdiHookCreator{ + nvidiaCDIHookPath: defaultNvidiaCDIHookPath, + disabledHooks: make(map[HookName]bool), + } + for _, opt := range opts { + opt(cdiHookCreator) + } + + if cdiHookCreator.disabledHooks[AllHooks] { + return &allDisabledHookCreator{} + } + + cdiHookCreator.fixedArgs = getFixedArgsForCDIHookCLI(cdiHookCreator.nvidiaCDIHookPath) + + return cdiHookCreator +} + +// Create creates a new hook with the given name and arguments. +// If a hook is disabled, a nil hook is returned. +func (c cdiHookCreator) Create(name HookName, args ...string) *Hook { + if c.isDisabled(name, args...) { + return nil } return &Hook{ Lifecycle: cdi.CreateContainerHook, Path: c.nvidiaCDIHookPath, - Args: append(c.requiredArgs(name), args...), + Args: append(c.requiredArgs(name), c.transformArgs(name, args...)...), Env: []string{fmt.Sprintf("NVIDIA_CTK_DEBUG=%v", c.debugLogging)}, } } -func (c CDIHook) requiredArgs(name string) []string { - base := filepath.Base(c.nvidiaCDIHookPath) - if base == "nvidia-ctk" { - return []string{base, "hook", name} +// isDisabled checks if the specified hook name is disabled. +func (c cdiHookCreator) isDisabled(name HookName, args ...string) bool { + if c.disabledHooks[name] { + return true } - return []string{base, name} + + switch name { + case CreateSymlinksHook: + if len(args) == 0 { + return true + } + case ChmodHook: + if len(args) == 0 { + return true + } + } + return false +} + +func (c cdiHookCreator) requiredArgs(name HookName) []string { + return append(c.fixedArgs, string(name)) +} + +func (c cdiHookCreator) transformArgs(name HookName, args ...string) []string { + switch name { + case CreateSymlinksHook: + var transformedArgs []string + for _, arg := range args { + transformedArgs = append(transformedArgs, "--link", arg) + } + return transformedArgs + case ChmodHook: + var transformedArgs = []string{"--mode", "755"} + for _, arg := range args { + transformedArgs = append(transformedArgs, "--path", arg) + } + return transformedArgs + default: + return args + } +} + +// getFixedArgsForCDIHookCLI returns the fixed arguments for the hook CLI. +// If the nvidia-ctk binary is used, hooks are implemented under the hook +// subcommand. +// For the nvidia-cdi-hook binary, the hooks are implemented as subcommands of +// the top-level CLI. +func getFixedArgsForCDIHookCLI(nvidiaCDIHookPath string) []string { + base := filepath.Base(nvidiaCDIHookPath) + if base == "nvidia-ctk" { + return []string{base, "hook"} + } + return []string{base} } diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index 0c632a74..eb5ab467 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -72,7 +72,7 @@ func createLDCacheUpdateHook(hookCreator HookCreator, ldconfig string, libraries args = append(args, "--folder", f) } - return hookCreator.Create(HookUpdateLDCache, args...) + return hookCreator.Create(UpdateLDCacheHook, args...) } // getLibraryPaths extracts the library dirs from the specified mounts diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index 29de1fb8..ff18118d 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -31,7 +31,7 @@ const ( func TestLDCacheUpdateHook(t *testing.T) { logger, _ := testlog.NewNullLogger() - hookCreator := NewHookCreator(testNvidiaCDIHookPath, false) + hookCreator := NewHookCreator(WithNVIDIACDIHookPath(testNvidiaCDIHookPath)) testCases := []struct { description string diff --git a/internal/discover/symlinks_test.go b/internal/discover/symlinks_test.go index 52d4a04b..e9a6c6f1 100644 --- a/internal/discover/symlinks_test.go +++ b/internal/discover/symlinks_test.go @@ -113,7 +113,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { expectedHooks: []Hook{ { Lifecycle: "createContainer", - Path: "/path/to/nvidia-cdi-hook", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, @@ -146,7 +146,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { expectedHooks: []Hook{ { Lifecycle: "createContainer", - Path: "/path/to/nvidia-cdi-hook", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, @@ -178,7 +178,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { expectedHooks: []Hook{ { Lifecycle: "createContainer", - Path: "/path/to/nvidia-cdi-hook", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, @@ -248,7 +248,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { }, { Lifecycle: "createContainer", - Path: "/path/to/nvidia-cdi-hook", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, @@ -298,7 +298,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { expectedHooks: []Hook{ { Lifecycle: "createContainer", - Path: "/path/to/nvidia-cdi-hook", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{ "nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so", @@ -311,7 +311,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { }, } - hookCreator := NewHookCreator("/path/to/nvidia-cdi-hook", false) + hookCreator := NewHookCreator() for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { d := WithDriverDotSoSymlinks( diff --git a/internal/platform-support/tegra/csv_test.go b/internal/platform-support/tegra/csv_test.go index 27734c39..fa717a64 100644 --- a/internal/platform-support/tegra/csv_test.go +++ b/internal/platform-support/tegra/csv_test.go @@ -183,7 +183,7 @@ func TestDiscovererFromCSVFiles(t *testing.T) { }, } - hookCreator := discover.NewHookCreator("/usr/bin/nvidia-cdi-hook", false) + hookCreator := discover.NewHookCreator() for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { defer setGetTargetsFromCSVFiles(tc.moutSpecs)() diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index 9564d06e..9386c6ac 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -18,7 +18,6 @@ package runtime import ( "fmt" - "os" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" @@ -76,10 +75,7 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp return nil, err } - hookCreator := discover.NewHookCreator( - cfg.NVIDIACTKConfig.Path, - cfg.NVIDIAContainerRuntimeConfig.DebugFilePath == "" || cfg.NVIDIAContainerRuntimeConfig.DebugFilePath == os.DevNull, - ) + hookCreator := discover.NewHookCreator(discover.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path)) mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image) // We update the mode here so that we can continue passing just the config to other functions. diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index f9d9f83d..4ff11e47 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -37,14 +37,27 @@ type Interface interface { GetDeviceSpecsByID(...string) ([]specs.Device, error) } -// HookName is an alias for the discover.HookName type. +// A HookName represents one of the predefined NVIDIA CDI hooks. type HookName = discover.HookName -// Aliases for the discover.HookName constants. const ( - HookEnableCudaCompat = discover.HookEnableCudaCompat - HookCreateSymlinks = discover.HookCreateSymlinks - HookUpdateLDCache = discover.HookUpdateLDCache + // AllHooks is a special hook name that allows all hooks to be matched. + AllHooks = discover.AllHooks + + // A CreateSymlinksHook is used to create symlinks in the container. + CreateSymlinksHook = discover.CreateSymlinksHook + // An EnableCudaCompatHook is used to enabled CUDA Forward Compatibility. + // Added in v1.17.5 + EnableCudaCompatHook = discover.EnableCudaCompatHook + // An UpdateLDCacheHook is used to update the ldcache in the container. + UpdateLDCacheHook = discover.UpdateLDCacheHook + + // Deprecated: Use CreateSymlinksHook instead. + HookCreateSymlinks = CreateSymlinksHook + // Deprecated: Use EnableCudaCompatHook instead. + HookEnableCudaCompat = EnableCudaCompatHook + // Deprecated: Use UpdateLDCacheHook instead. + HookUpdateLDCache = UpdateLDCacheHook ) // A FeatureFlag refers to a specific feature that can be toggled in the CDI api. diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 3fbc0e94..ff02ac72 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -106,11 +106,9 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover ) discoverers = append(discoverers, driverDotSoSymlinksDiscoverer) - if l.HookIsSupported(HookEnableCudaCompat) { - // TODO: The following should use the version directly. - cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver) - discoverers = append(discoverers, cudaCompatLibHookDiscoverer) - } + // TODO: The following should use the version directly. + cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver) + discoverers = append(discoverers, cudaCompatLibHookDiscoverer) updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath) discoverers = append(discoverers, updateLDCache) diff --git a/pkg/nvcdi/driver-wsl.go b/pkg/nvcdi/driver-wsl.go index 26a67b11..e382cb9d 100644 --- a/pkg/nvcdi/driver-wsl.go +++ b/pkg/nvcdi/driver-wsl.go @@ -135,7 +135,7 @@ func (m nvidiaSMISimlinkHook) Hooks() ([]discover.Hook, error) { } link := "/usr/bin/nvidia-smi" links := []string{fmt.Sprintf("%s::%s", target, link)} - symlinkHook := m.hookCreator.Create(HookCreateSymlinks, links...) + symlinkHook := m.hookCreator.Create(CreateSymlinksHook, links...) return symlinkHook.Hooks() } diff --git a/pkg/nvcdi/driver-wsl_test.go b/pkg/nvcdi/driver-wsl_test.go index 3d9696a8..39eeb500 100644 --- a/pkg/nvcdi/driver-wsl_test.go +++ b/pkg/nvcdi/driver-wsl_test.go @@ -29,7 +29,7 @@ import ( func TestNvidiaSMISymlinkHook(t *testing.T) { logger, _ := testlog.NewNullLogger() - hookCreator := discover.NewHookCreator("nvidia-cdi-hook", false) + hookCreator := discover.NewHookCreator() errMounts := errors.New("mounts error") @@ -93,7 +93,7 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { expectedHooks: []discover.Hook{ { Lifecycle: "createContainer", - Path: "nvidia-cdi-hook", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "nvidia-smi::/usr/bin/nvidia-smi"}, Env: []string{"NVIDIA_CTK_DEBUG=false"}, @@ -114,7 +114,7 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { expectedHooks: []discover.Hook{ { Lifecycle: "createContainer", - Path: "nvidia-cdi-hook", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "/some/path/nvidia-smi::/usr/bin/nvidia-smi"}, Env: []string{"NVIDIA_CTK_DEBUG=false"}, @@ -135,7 +135,7 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { expectedHooks: []discover.Hook{ { Lifecycle: "createContainer", - Path: "nvidia-cdi-hook", + Path: "/usr/bin/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "/some/path/nvidia-smi::/usr/bin/nvidia-smi"}, Env: []string{"NVIDIA_CTK_DEBUG=false"}, diff --git a/pkg/nvcdi/hooks.go b/pkg/nvcdi/hooks.go deleted file mode 100644 index 20ef59a4..00000000 --- a/pkg/nvcdi/hooks.go +++ /dev/null @@ -1,27 +0,0 @@ -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package nvcdi - -// HookIsSupported checks whether a hook of the specified name is supported. -// Hooks must be explicitly disabled, meaning that if no disabled hooks are -// all hooks are supported. -func (l *nvcdilib) HookIsSupported(h HookName) bool { - if len(l.disabledHooks) == 0 { - return true - } - return !l.disabledHooks[h] -} diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index 15d6361c..409721ef 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -81,9 +81,6 @@ func New(opts ...Option) (Interface, error) { if l.nvidiaCDIHookPath == "" { l.nvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" } - // create hookCreator - l.hookCreator = discover.NewHookCreator(l.nvidiaCDIHookPath, false) - if l.driverRoot == "" { l.driverRoot = "/" } @@ -133,7 +130,7 @@ func New(opts ...Option) (Interface, error) { l.vendor = "management.nvidia.com" } // Management containers in general do not require CUDA Forward compatibility. - l.disabledHooks = append(l.disabledHooks, discover.HookEnableCudaCompat) + l.disabledHooks = append(l.disabledHooks, HookEnableCudaCompat) lib = (*managementlib)(l) case ModeNvml: lib = (*nvmllib)(l) @@ -158,6 +155,12 @@ func New(opts ...Option) (Interface, error) { return nil, fmt.Errorf("unknown mode %q", l.mode) } + // create hookCreator + l.hookCreator = discover.NewHookCreator( + discover.WithNVIDIACDIHookPath(l.nvidiaCDIHookPath), + discover.WithDisabledHooks(l.disabledHooks...), + ) + w := wrapper{ Interface: lib, vendor: l.vendor, diff --git a/pkg/nvcdi/workarounds-device-folder-permissions.go b/pkg/nvcdi/workarounds-device-folder-permissions.go index 71967ac4..888193d6 100644 --- a/pkg/nvcdi/workarounds-device-folder-permissions.go +++ b/pkg/nvcdi/workarounds-device-folder-permissions.go @@ -61,18 +61,9 @@ func (d *deviceFolderPermissions) Hooks() ([]discover.Hook, error) { if err != nil { return nil, fmt.Errorf("failed to get device subfolders: %v", err) } - if len(folders) == 0 { - return nil, nil - } - args := []string{"--mode", "755"} - for _, folder := range folders { - args = append(args, "--path", folder) - } - - hook := d.hookCreator.Create("chmod", args...) - - return []discover.Hook{*hook}, nil + //nolint:staticcheck // The ChmodHook is deprecated and will be removed in a future release. + return d.hookCreator.Create(discover.ChmodHook, folders...).Hooks() } func (d *deviceFolderPermissions) getDeviceSubfolders() ([]string, error) {