From 479df7134a6536fcda29764f7ae4831cd4fa01a9 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 21 May 2025 15:58:56 +0200 Subject: [PATCH] Add envvar to control debug logging in CDI hooks This change allows hooks to be configured with debug logging. This is currently only enabled for the hooks generated from the runtime. Signed-off-by: Evan Lezar --- cmd/nvidia-cdi-hook/main.go | 6 ++++-- cmd/nvidia-ctk-installer/toolkit/toolkit_test.go | 4 ++++ cmd/nvidia-ctk/cdi/generate/generate_test.go | 6 ++++++ internal/discover/discover.go | 1 + internal/discover/graphics_test.go | 5 ++++- internal/discover/hooks.go | 6 +++++- internal/discover/ldconfig_test.go | 3 ++- internal/discover/symlinks_test.go | 7 ++++++- internal/edits/hook.go | 1 + internal/platform-support/tegra/csv_test.go | 4 +++- internal/runtime/runtime_factory.go | 6 +++++- pkg/nvcdi/driver-wsl_test.go | 5 ++++- pkg/nvcdi/lib.go | 2 +- 13 files changed, 46 insertions(+), 10 deletions(-) diff --git a/cmd/nvidia-cdi-hook/main.go b/cmd/nvidia-cdi-hook/main.go index 55cd4864..20f4ae62 100644 --- a/cmd/nvidia-cdi-hook/main.go +++ b/cmd/nvidia-cdi-hook/main.go @@ -70,13 +70,15 @@ func main() { Aliases: []string{"d"}, Usage: "Enable debug-level logging", Destination: &opts.Debug, - EnvVars: []string{"NVIDIA_CDI_DEBUG"}, + // TODO: Support for NVIDIA_CDI_DEBUG is deprecated and NVIDIA_CTK_DEBUG should be used instead. + EnvVars: []string{"NVIDIA_CTK_DEBUG", "NVIDIA_CDI_DEBUG"}, }, &cli.BoolFlag{ Name: "quiet", Usage: "Suppress all output except for errors; overrides --debug", Destination: &opts.Quiet, - EnvVars: []string{"NVIDIA_CDI_QUIET"}, + // TODO: Support for NVIDIA_CDI_QUIET is deprecated and NVIDIA_CTK_QUIET should be used instead. + EnvVars: []string{"NVDIA_CTK_QUIET", "NVIDIA_CDI_QUIET"}, }, } diff --git a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go index e91e29fc..d7246330 100644 --- a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go @@ -95,6 +95,8 @@ containerEdits: - create-symlinks - --link - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: {{ .toolkitRoot }}/nvidia-cdi-hook args: @@ -102,6 +104,8 @@ containerEdits: - update-ldcache - --folder - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false mounts: - hostPath: /host/driver/root/lib/x86_64-linux-gnu/libcuda.so.999.88.77 containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index d6aae4d7..7007ed04 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -91,12 +91,16 @@ containerEdits: - create-symlinks - --link - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: - nvidia-cdi-hook - enable-cuda-compat - --host-driver-version=999.88.77 + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: @@ -104,6 +108,8 @@ containerEdits: - update-ldcache - --folder - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false mounts: - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 diff --git a/internal/discover/discover.go b/internal/discover/discover.go index fc639296..3fb890a9 100644 --- a/internal/discover/discover.go +++ b/internal/discover/discover.go @@ -34,6 +34,7 @@ type Hook struct { Lifecycle string Path string Args []string + Env []string } // Discover defines an interface for discovering the devices, mounts, and hooks available on a system diff --git a/internal/discover/graphics_test.go b/internal/discover/graphics_test.go index 3aea93cb..72aa7dcc 100644 --- a/internal/discover/graphics_test.go +++ b/internal/discover/graphics_test.go @@ -25,7 +25,7 @@ import ( func TestGraphicsLibrariesDiscoverer(t *testing.T) { logger, _ := testlog.NewNullLogger() - hookCreator := NewHookCreator("/usr/bin/nvidia-cdi-hook") + hookCreator := NewHookCreator("/usr/bin/nvidia-cdi-hook", false) testCases := []struct { description string @@ -71,6 +71,7 @@ func TestGraphicsLibrariesDiscoverer(t *testing.T) { Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "../libnvidia-allocator.so.1::/usr/lib64/gbm/nvidia-drm_gbm.so", }, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -98,6 +99,7 @@ func TestGraphicsLibrariesDiscoverer(t *testing.T) { Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libnvidia-vulkan-producer.so.123.45.67::/usr/lib64/libnvidia-vulkan-producer.so", }, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -129,6 +131,7 @@ func TestGraphicsLibrariesDiscoverer(t *testing.T) { "--link", "../libnvidia-allocator.so.1::/usr/lib64/gbm/nvidia-drm_gbm.so", "--link", "libnvidia-vulkan-producer.so.123.45.67::/usr/lib64/libnvidia-vulkan-producer.so", }, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, diff --git a/internal/discover/hooks.go b/internal/discover/hooks.go index 0f239bfd..3e0d1fc2 100644 --- a/internal/discover/hooks.go +++ b/internal/discover/hooks.go @@ -17,6 +17,7 @@ package discover import ( + "fmt" "path/filepath" "tags.cncf.io/container-device-interface/pkg/cdi" @@ -49,15 +50,17 @@ type Option func(*CDIHook) type CDIHook struct { nvidiaCDIHookPath string + debugLogging bool } type HookCreator interface { Create(string, ...string) *Hook } -func NewHookCreator(nvidiaCDIHookPath string) HookCreator { +func NewHookCreator(nvidiaCDIHookPath string, debugLogging bool) HookCreator { CDIHook := &CDIHook{ nvidiaCDIHookPath: nvidiaCDIHookPath, + debugLogging: debugLogging, } return CDIHook @@ -80,6 +83,7 @@ func (c CDIHook) Create(name string, args ...string) *Hook { Lifecycle: cdi.CreateContainerHook, Path: c.nvidiaCDIHookPath, Args: append(c.requiredArgs(name), args...), + Env: []string{fmt.Sprintf("NVIDIA_CTK_DEBUG=%v", c.debugLogging)}, } } diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index ddbda4cc..29de1fb8 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -31,7 +31,7 @@ const ( func TestLDCacheUpdateHook(t *testing.T) { logger, _ := testlog.NewNullLogger() - hookCreator := NewHookCreator(testNvidiaCDIHookPath) + hookCreator := NewHookCreator(testNvidiaCDIHookPath, false) testCases := []struct { description string @@ -96,6 +96,7 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: testNvidiaCDIHookPath, Args: tc.expectedArgs, Lifecycle: "createContainer", + Env: []string{"NVIDIA_CTK_DEBUG=false"}, } d, err := NewLDCacheUpdateHook(logger, mountMock, hookCreator, tc.ldconfigPath) diff --git a/internal/discover/symlinks_test.go b/internal/discover/symlinks_test.go index 2a6c9812..52d4a04b 100644 --- a/internal/discover/symlinks_test.go +++ b/internal/discover/symlinks_test.go @@ -115,6 +115,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { Lifecycle: "createContainer", Path: "/path/to/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -147,6 +148,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { Lifecycle: "createContainer", Path: "/path/to/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -178,6 +180,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { Lifecycle: "createContainer", Path: "/path/to/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -247,6 +250,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { Lifecycle: "createContainer", Path: "/path/to/nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -301,12 +305,13 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) { "--link", "libGLX_nvidia.so.1.2.3::/usr/lib/libGLX_indirect.so.0", "--link", "libnvidia-opticalflow.so.1::/usr/lib/libnvidia-opticalflow.so", }, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, } - hookCreator := NewHookCreator("/path/to/nvidia-cdi-hook") + hookCreator := NewHookCreator("/path/to/nvidia-cdi-hook", false) for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { d := WithDriverDotSoSymlinks( diff --git a/internal/edits/hook.go b/internal/edits/hook.go index f528fe1d..ca5466d4 100644 --- a/internal/edits/hook.go +++ b/internal/edits/hook.go @@ -42,6 +42,7 @@ func (d hook) toSpec() *specs.Hook { HookName: d.Lifecycle, Path: d.Path, Args: d.Args, + Env: d.Env, } return &s diff --git a/internal/platform-support/tegra/csv_test.go b/internal/platform-support/tegra/csv_test.go index 129bf00c..27734c39 100644 --- a/internal/platform-support/tegra/csv_test.go +++ b/internal/platform-support/tegra/csv_test.go @@ -97,6 +97,7 @@ func TestDiscovererFromCSVFiles(t *testing.T) { "--link", "/usr/lib/aarch64-linux-gnu/tegra/libv4l2_nvargus.so::/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvargus.so", }, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -153,6 +154,7 @@ func TestDiscovererFromCSVFiles(t *testing.T) { "--link", "/usr/lib/aarch64-linux-gnu/tegra/libv4l2_nvargus.so::/usr/lib/aarch64-linux-gnu/libv4l/plugins/nv/libv4l2_nvargus.so", }, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -181,7 +183,7 @@ func TestDiscovererFromCSVFiles(t *testing.T) { }, } - hookCreator := discover.NewHookCreator("/usr/bin/nvidia-cdi-hook") + hookCreator := discover.NewHookCreator("/usr/bin/nvidia-cdi-hook", false) for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { defer setGetTargetsFromCSVFiles(tc.moutSpecs)() diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index c1a82ac9..9564d06e 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -18,6 +18,7 @@ package runtime import ( "fmt" + "os" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" @@ -75,7 +76,10 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp return nil, err } - hookCreator := discover.NewHookCreator(cfg.NVIDIACTKConfig.Path) + hookCreator := discover.NewHookCreator( + cfg.NVIDIACTKConfig.Path, + cfg.NVIDIAContainerRuntimeConfig.DebugFilePath == "" || cfg.NVIDIAContainerRuntimeConfig.DebugFilePath == os.DevNull, + ) mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image) // We update the mode here so that we can continue passing just the config to other functions. diff --git a/pkg/nvcdi/driver-wsl_test.go b/pkg/nvcdi/driver-wsl_test.go index 27247cc6..3d9696a8 100644 --- a/pkg/nvcdi/driver-wsl_test.go +++ b/pkg/nvcdi/driver-wsl_test.go @@ -29,7 +29,7 @@ import ( func TestNvidiaSMISymlinkHook(t *testing.T) { logger, _ := testlog.NewNullLogger() - hookCreator := discover.NewHookCreator("nvidia-cdi-hook") + hookCreator := discover.NewHookCreator("nvidia-cdi-hook", false) errMounts := errors.New("mounts error") @@ -96,6 +96,7 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { Path: "nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "nvidia-smi::/usr/bin/nvidia-smi"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -116,6 +117,7 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { Path: "nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "/some/path/nvidia-smi::/usr/bin/nvidia-smi"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, @@ -136,6 +138,7 @@ func TestNvidiaSMISymlinkHook(t *testing.T) { Path: "nvidia-cdi-hook", Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "/some/path/nvidia-smi::/usr/bin/nvidia-smi"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, }, }, }, diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index 165a7136..cbbf2419 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -85,7 +85,7 @@ func New(opts ...Option) (Interface, error) { l.nvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" } // create hookCreator - l.hookCreator = discover.NewHookCreator(l.nvidiaCDIHookPath) + l.hookCreator = discover.NewHookCreator(l.nvidiaCDIHookPath, false) if l.driverRoot == "" { l.driverRoot = "/"