From f72732edccdbeea168d03779c4e810838426fc99 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Thu, 22 May 2025 13:51:15 +0200 Subject: [PATCH] Add --disable-hook flag to cdi generate command When running the nvidia-ctk cdi generate command, a user should be able to opt out of specific hooks. We propose to add a flag --disable-hook that will take a comma-separated list of hooks that will be skipped when creating the CDI spec. Signed-off-by: Carlos Eduardo Arango Gutierrez --- cmd/nvidia-ctk/cdi/generate/generate.go | 18 +- cmd/nvidia-ctk/cdi/generate/generate_test.go | 177 +++++++++++++++++++ internal/discover/hooks.go | 25 ++- pkg/nvcdi/driver-nvml.go | 8 +- pkg/nvcdi/hooks.go | 27 --- pkg/nvcdi/lib.go | 13 +- pkg/nvcdi/options.go | 10 +- 7 files changed, 229 insertions(+), 49 deletions(-) delete mode 100644 pkg/nvcdi/hooks.go diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index b187335b..569cd133 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -57,6 +57,7 @@ type options struct { configSearchPaths cli.StringSlice librarySearchPaths cli.StringSlice + disabledHooks cli.StringSlice csv struct { files cli.StringSlice @@ -176,6 +177,13 @@ func (m command) build() *cli.Command { Usage: "Specify a pattern the CSV mount specifications.", Destination: &opts.csv.ignorePatterns, }, + &cli.StringSliceFlag{ + Name: "disable-hook", + Aliases: []string{"disable-hooks"}, + Usage: "Hook to skip when generating the CDI specification. Can be specified multiple times. Can be a comma-separated list of hooks or a single hook name.", + Value: cli.NewStringSlice(), + Destination: &opts.disabledHooks, + }, } return &c @@ -262,7 +270,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { deviceNamers = append(deviceNamers, deviceNamer) } - cdilib, err := nvcdi.New( + initOpts := []nvcdi.Option{ nvcdi.WithLogger(m.logger), nvcdi.WithDriverRoot(opts.driverRoot), nvcdi.WithDevRoot(opts.devRoot), @@ -276,7 +284,13 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()), // We set the following to allow for dependency injection: nvcdi.WithNvmlLib(opts.nvmllib), - ) + } + + for _, hook := range opts.disabledHooks.Value() { + initOpts = append(initOpts, nvcdi.WithDisabledHook(hook)) + } + + cdilib, err := nvcdi.New(initOpts...) if err != nil { return nil, fmt.Errorf("failed to create CDI library: %v", err) } diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index d6aae4d7..67bf9d23 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -26,6 +26,7 @@ import ( "github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100" testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" + "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/internal/test" ) @@ -36,6 +37,9 @@ func TestGenerateSpec(t *testing.T) { require.NoError(t, err) driverRoot := filepath.Join(moduleRoot, "testdata", "lookup", "rootfs-1") + disableHook1 := cli.NewStringSlice("enable-cuda-compat") + disableHook2 := cli.NewStringSlice("enable-cuda-compat", "update-ldcache") + disableHook3 := cli.NewStringSlice("all") logger, _ := testlog.NewNullLogger() testCases := []struct { @@ -113,6 +117,179 @@ containerEdits: - nodev - rbind - rprivate +`, + }, + { + description: "disableHooks1", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disabledHooks: *disableHook1, + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disabledHooks: *disableHook1, + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl + hooks: + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-symlinks + - --link + - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - update-ldcache + - --folder + - /lib/x86_64-linux-gnu + mounts: + - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - rbind + - rprivate +`, + }, + { + description: "disableHooks2", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disabledHooks: *disableHook2, + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disabledHooks: *disableHook2, + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl + hooks: + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-symlinks + - --link + - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + mounts: + - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - rbind + - rprivate +`, + }, + { + description: "disableHooksAll", + options: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + driverRoot: driverRoot, + disabledHooks: *disableHook3, + }, + expectedOptions: options{ + format: "yaml", + mode: "nvml", + vendor: "example.com", + class: "device", + nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook", + driverRoot: driverRoot, + disabledHooks: *disableHook3, + }, + expectedSpec: `--- +cdiVersion: 0.5.0 +kind: example.com/device +devices: + - name: "0" + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 + - name: all + containerEdits: + deviceNodes: + - path: /dev/nvidia0 + hostPath: {{ .driverRoot }}/dev/nvidia0 +containerEdits: + env: + - NVIDIA_VISIBLE_DEVICES=void + deviceNodes: + - path: /dev/nvidiactl + hostPath: {{ .driverRoot }}/dev/nvidiactl + mounts: + - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 + containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 + options: + - ro + - nosuid + - nodev + - rbind + - rprivate `, }, } diff --git a/internal/discover/hooks.go b/internal/discover/hooks.go index c47c9399..9879f8e1 100644 --- a/internal/discover/hooks.go +++ b/internal/discover/hooks.go @@ -46,8 +46,8 @@ func (h *Hook) Hooks() ([]Hook, error) { type HookName string -// DisabledHooks allows individual hooks to be disabled. -type DisabledHooks map[HookName]bool +// disabledHooks allows individual hooks to be disabled. +type disabledHooks map[HookName]bool const ( // HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility. @@ -67,26 +67,43 @@ var AllHooks = []HookName{ HookUpdateLDCache, } -// Option is a function that configures the nvcdilib type Option func(*CDIHook) type CDIHook struct { nvidiaCDIHookPath string + disabledHooks disabledHooks } type HookCreator interface { Create(HookName, ...string) *Hook } -func NewHookCreator(nvidiaCDIHookPath string) HookCreator { +func WithDisabledHooks(hooks ...HookName) Option { + return func(c *CDIHook) { + for _, hook := range hooks { + c.disabledHooks[hook] = true + } + } +} + +func NewHookCreator(nvidiaCDIHookPath string, opts ...Option) HookCreator { CDIHook := &CDIHook{ nvidiaCDIHookPath: nvidiaCDIHookPath, + disabledHooks: disabledHooks{}, + } + + for _, opt := range opts { + opt(CDIHook) } return CDIHook } func (c CDIHook) Create(name HookName, args ...string) *Hook { + if c.disabledHooks[name] { + return nil + } + if name == "create-symlinks" { if len(args) == 0 { return nil diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 3fbc0e94..ff02ac72 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -106,11 +106,9 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover ) discoverers = append(discoverers, driverDotSoSymlinksDiscoverer) - if l.HookIsSupported(HookEnableCudaCompat) { - // TODO: The following should use the version directly. - cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver) - discoverers = append(discoverers, cudaCompatLibHookDiscoverer) - } + // TODO: The following should use the version directly. + cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver) + discoverers = append(discoverers, cudaCompatLibHookDiscoverer) updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath) discoverers = append(discoverers, updateLDCache) diff --git a/pkg/nvcdi/hooks.go b/pkg/nvcdi/hooks.go deleted file mode 100644 index 20ef59a4..00000000 --- a/pkg/nvcdi/hooks.go +++ /dev/null @@ -1,27 +0,0 @@ -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package nvcdi - -// HookIsSupported checks whether a hook of the specified name is supported. -// Hooks must be explicitly disabled, meaning that if no disabled hooks are -// all hooks are supported. -func (l *nvcdilib) HookIsSupported(h HookName) bool { - if len(l.disabledHooks) == 0 { - return true - } - return !l.disabledHooks[h] -} diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index 6527498d..b7742b3e 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -56,15 +56,13 @@ type nvcdilib struct { mergedDeviceOptions []transform.MergedDeviceOption - disabledHooks discover.DisabledHooks + disabledHooks []discover.HookName hookCreator discover.HookCreator } // New creates a new nvcdi library func New(opts ...Option) (Interface, error) { - l := &nvcdilib{ - disabledHooks: make(discover.DisabledHooks), - } + l := &nvcdilib{} for _, opt := range opts { opt(l) } @@ -81,8 +79,6 @@ func New(opts ...Option) (Interface, error) { if l.nvidiaCDIHookPath == "" { l.nvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" } - // create hookCreator - l.hookCreator = discover.NewHookCreator(l.nvidiaCDIHookPath) if l.driverRoot == "" { l.driverRoot = "/" @@ -150,7 +146,7 @@ func New(opts ...Option) (Interface, error) { l.vendor = "management.nvidia.com" } // Management containers in general do not require CUDA Forward compatibility. - l.disabledHooks[HookEnableCudaCompat] = true + l.disabledHooks = append(l.disabledHooks, HookEnableCudaCompat) lib = (*managementlib)(l) case ModeNvml: lib = (*nvmllib)(l) @@ -175,6 +171,9 @@ func New(opts ...Option) (Interface, error) { return nil, fmt.Errorf("unknown mode %q", l.mode) } + // create hookCreator + l.hookCreator = discover.NewHookCreator(l.nvidiaCDIHookPath, discover.WithDisabledHooks(l.disabledHooks...)) + w := wrapper{ Interface: lib, vendor: l.vendor, diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index f38f2b4a..5a7f3776 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -21,6 +21,7 @@ import ( "github.com/NVIDIA/go-nvlib/pkg/nvlib/info" "github.com/NVIDIA/go-nvml/pkg/nvml" + "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform" ) @@ -158,11 +159,12 @@ func WithLibrarySearchPaths(paths []string) Option { // WithDisabledHook allows specific hooks to the disabled. // This option can be specified multiple times for each hook. -func WithDisabledHook(hook HookName) Option { +func WithDisabledHook[T string | HookName](hook T) Option { return func(o *nvcdilib) { - if o.disabledHooks == nil { - o.disabledHooks = make(map[HookName]bool) + if hook == "all" { + o.disabledHooks = discover.AllHooks + } else { + o.disabledHooks = append(o.disabledHooks, discover.HookName(hook)) } - o.disabledHooks[hook] = true } }