From 9daa179f253caf80a89f4f5f00d8450c585c293a Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 27 Feb 2025 14:38:37 +0200 Subject: [PATCH] Add create-soname-symlinks hook This change adds a create-soname-symlinks hook that can be used to ensure that the soname symlinks for injected libraries exist in a container. This is done by calling ldconfig -n -N for the folders containing the injected libraries. This also ensures that libcuda.so is present in the ldcache when the update-ldcache hook is run. Signed-off-by: Evan Lezar --- cmd/nvidia-cdi-hook/commands/commands.go | 2 + .../create-soname-symlinks/soname-symlinks.go | 131 ++++++++++++++++++ .../container/toolkit/toolkit_test.go | 7 + cmd/nvidia-ctk/cdi/generate/generate_test.go | 7 + internal/discover/ldconfig.go | 27 ++-- internal/discover/ldconfig_test.go | 66 +++++++-- tests/e2e/nvidia-container-toolkit_test.go | 22 +++ 7 files changed, 237 insertions(+), 25 deletions(-) create mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go diff --git a/cmd/nvidia-cdi-hook/commands/commands.go b/cmd/nvidia-cdi-hook/commands/commands.go index 3f80ba9b..7ad34ca9 100644 --- a/cmd/nvidia-cdi-hook/commands/commands.go +++ b/cmd/nvidia-cdi-hook/commands/commands.go @@ -20,6 +20,7 @@ import ( "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod" + soname "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-soname-symlinks" symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat" ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache" @@ -34,5 +35,6 @@ func New(logger logger.Interface) []*cli.Command { symlinks.NewCommand(logger), chmod.NewCommand(logger), cudacompat.NewCommand(logger), + soname.NewCommand(logger), } } diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go new file mode 100644 index 00000000..5f163ec7 --- /dev/null +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go @@ -0,0 +1,131 @@ +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package soname + +import ( + "errors" + "fmt" + "path/filepath" + + "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/utils" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" +) + +type command struct { + logger logger.Interface + utils.SafeExecer +} + +type options struct { + folders cli.StringSlice + ldconfigPath string + containerSpec string +} + +// NewCommand constructs an create-soname-symlinks command with the specified logger +func NewCommand(logger logger.Interface) *cli.Command { + c := command{ + logger: logger, + SafeExecer: utils.NewSafeExecer(logger), + } + return c.build() +} + +// build the create-soname-symlinks command +func (m command) build() *cli.Command { + cfg := options{} + + // Create the 'create-soname-symlinks' command + c := cli.Command{ + Name: "create-soname-symlinks", + Usage: "Create soname symlinks for the specified folders using ldconfig -n -N", + Before: func(c *cli.Context) error { + return m.validateFlags(c, &cfg) + }, + Action: func(c *cli.Context) error { + return m.run(c, &cfg) + }, + } + + c.Flags = []cli.Flag{ + &cli.StringSliceFlag{ + Name: "folder", + Usage: "Specify a folder to search for shared libraries for which soname symlinks need to be created", + Destination: &cfg.folders, + }, + &cli.StringFlag{ + Name: "ldconfig-path", + Usage: "Specify the path to the ldconfig program", + Destination: &cfg.ldconfigPath, + Value: "/sbin/ldconfig", + }, + &cli.StringFlag{ + Name: "container-spec", + Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", + Destination: &cfg.containerSpec, + }, + } + + return &c +} + +func (m command) validateFlags(c *cli.Context, cfg *options) error { + if cfg.ldconfigPath == "" { + return errors.New("ldconfig-path must be specified") + } + return nil +} + +func (m command) run(c *cli.Context, cfg *options) error { + s, err := oci.LoadContainerState(cfg.containerSpec) + if err != nil { + return fmt.Errorf("failed to load container state: %v", err) + } + + containerRoot, err := s.GetContainerRoot() + if err != nil { + return fmt.Errorf("failed to determined container root: %v", err) + } + if containerRoot == "" { + m.logger.Warningf("No container root detected") + return nil + } + + dirs := cfg.folders.Value() + if len(dirs) == 0 { + return nil + } + + ldconfigPath := utils.ResolveHostLDConfigPath(cfg.ldconfigPath) + args := []string{filepath.Base(ldconfigPath)} + + args = append(args, + // Specify the containerRoot to use. + "-r", containerRoot, + // Specify -n to only process the specified folders. + "-n", + // Explicitly disable updating the LDCache. + "-N", + ) + // Explicitly specific the directories to add. + args = append(args, dirs...) + + return m.Exec(ldconfigPath, args, nil) +} diff --git a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go index dfc33120..2e7d4755 100644 --- a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go @@ -86,6 +86,13 @@ containerEdits: - --host-driver-version=999.88.77 hookName: createContainer path: {{ .toolkitRoot }}/nvidia-cdi-hook + - args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + hookName: createContainer + path: {{ .toolkitRoot }}/nvidia-cdi-hook - args: - nvidia-cdi-hook - update-ldcache diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index 57bd865a..330a8cfc 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -85,6 +85,13 @@ containerEdits: - --host-driver-version=999.88.77 hookName: createContainer path: /usr/bin/nvidia-cdi-hook + - args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + hookName: createContainer + path: /usr/bin/nvidia-cdi-hook - args: - nvidia-cdi-hook - update-ldcache diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index b81b9be5..db0526ec 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -50,16 +50,16 @@ func (d ldconfig) Hooks() ([]Hook, error) { if err != nil { return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) } - h := CreateLDCacheUpdateHook( + hooks := CreateLDCacheUpdateHooks( d.nvidiaCDIHookPath, d.ldconfigPath, getLibraryPaths(mounts), ) - return []Hook{h}, nil + return hooks, nil } -// CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache -func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []string) Hook { +// CreateLDCacheUpdateHooks locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache +func CreateLDCacheUpdateHooks(executable string, ldconfig string, libraries []string) []Hook { var args []string if ldconfig != "" { @@ -70,13 +70,20 @@ func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []str args = append(args, "--folder", f) } - hook := CreateNvidiaCDIHook( - executable, - "update-ldcache", - args..., - ) + hooks := []Hook{ + CreateNvidiaCDIHook( + executable, + "create-soname-symlinks", + args..., + ), + CreateNvidiaCDIHook( + executable, + "update-ldcache", + args..., + ), + } - return hook + return hooks } // getLibraryPaths extracts the library dirs from the specified mounts diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index 0b214c77..2c2da46c 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -38,11 +38,22 @@ func TestLDCacheUpdateHook(t *testing.T) { mounts []Mount mountError error expectedError error - expectedArgs []string + expectedHooks []Hook }{ { - description: "empty mounts", - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache"}, + description: "empty mounts", + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache"}, + }, + }, }, { description: "mount error", @@ -65,7 +76,18 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libbar.so", }, }, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + }, + }, }, { description: "host paths are ignored", @@ -75,12 +97,34 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libfoo.so", }, }, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, + }, + }, }, { description: "explicit ldconfig path is passed", ldconfigPath: testLdconfigPath, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--ldconfig-path", testLdconfigPath}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + }, + }, }, } @@ -91,12 +135,6 @@ func TestLDCacheUpdateHook(t *testing.T) { return tc.mounts, tc.mountError }, } - expectedHook := Hook{ - Path: testNvidiaCDIHookPath, - Args: tc.expectedArgs, - Lifecycle: "createContainer", - } - d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCDIHookPath, tc.ldconfigPath) require.NoError(t, err) @@ -110,9 +148,7 @@ func TestLDCacheUpdateHook(t *testing.T) { } require.NoError(t, err) - require.Len(t, hooks, 1) - - require.EqualValues(t, hooks[0], expectedHook) + require.EqualValues(t, tc.expectedHooks, hooks) devices, err := d.Devices() require.NoError(t, err) diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index 5948014b..cde77be7 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -215,4 +215,26 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(ldconfigOut).To(ContainSubstring("/usr/lib64")) }) }) + + When("A container is run using CDI", Ordered, func() { + BeforeAll(func(ctx context.Context) { + _, _, err := r.Run("docker pull ubuntu") + Expect(err).ToNot(HaveOccurred()) + }) + + It("should include libcuda.so in the ldcache", func(ctx context.Context) { + ldcacheOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu bash -c \"ldconfig -p | grep 'libcuda.so'\"") + Expect(err).ToNot(HaveOccurred()) + Expect(ldcacheOutput).ToNot(BeEmpty()) + + ldcacheLines := strings.Split(ldcacheOutput, "\n") + var libs []string + for _, line := range ldcacheLines { + parts := strings.SplitN(line, " (", 2) + libs = append(libs, strings.TrimSpace(parts[0])) + } + + Expect(libs).To(ContainElements([]string{"libcuda.so", "libcuda.so.1"})) + }) + }) })