diff --git a/cmd/nvidia-cdi-hook/commands/commands.go b/cmd/nvidia-cdi-hook/commands/commands.go index 8917c25d..455b2afa 100644 --- a/cmd/nvidia-cdi-hook/commands/commands.go +++ b/cmd/nvidia-cdi-hook/commands/commands.go @@ -20,6 +20,7 @@ import ( "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod" + createsonamesymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-soname-symlinks" symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat" disabledevicenodemodification "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/disable-device-node-modification" @@ -35,6 +36,7 @@ func New(logger logger.Interface) []*cli.Command { symlinks.NewCommand(logger), chmod.NewCommand(logger), cudacompat.NewCommand(logger), + createsonamesymlinks.NewCommand(logger), disabledevicenodemodification.NewCommand(logger), } } diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go new file mode 100644 index 00000000..7f1da580 --- /dev/null +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go @@ -0,0 +1,166 @@ +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package create_soname_symlinks + +import ( + "errors" + "fmt" + "log" + "os" + + "github.com/moby/sys/reexec" + "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" +) + +const ( + reexecUpdateLdCacheCommandName = "reexec-create-soname-symlinks" +) + +type command struct { + logger logger.Interface +} + +type options struct { + folders cli.StringSlice + ldconfigPath string + containerSpec string +} + +func init() { + reexec.Register(reexecUpdateLdCacheCommandName, createSonameSymlinksHandler) + if reexec.Init() { + os.Exit(0) + } +} + +// NewCommand constructs an create-soname-symlinks command with the specified logger +func NewCommand(logger logger.Interface) *cli.Command { + c := command{ + logger: logger, + } + return c.build() +} + +// build the create-soname-symlinks command +func (m command) build() *cli.Command { + cfg := options{} + + // Create the 'create-soname-symlinks' command + c := cli.Command{ + Name: "create-soname-symlinks", + Usage: "Create soname symlinks libraries in specified directories", + Before: func(c *cli.Context) error { + return m.validateFlags(c, &cfg) + }, + Action: func(c *cli.Context) error { + return m.run(c, &cfg) + }, + } + + c.Flags = []cli.Flag{ + &cli.StringSliceFlag{ + Name: "folder", + Usage: "Specify a directory to generate soname symlinks in. Can be specified multiple times", + Destination: &cfg.folders, + }, + &cli.StringFlag{ + Name: "ldconfig-path", + Usage: "Specify the path to ldconfig on the host", + Destination: &cfg.ldconfigPath, + Value: "/sbin/ldconfig", + }, + &cli.StringFlag{ + Name: "container-spec", + Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", + Destination: &cfg.containerSpec, + }, + } + + return &c +} + +func (m command) validateFlags(c *cli.Context, cfg *options) error { + if cfg.ldconfigPath == "" { + return errors.New("ldconfig-path must be specified") + } + return nil +} + +func (m command) run(c *cli.Context, cfg *options) error { + s, err := oci.LoadContainerState(cfg.containerSpec) + if err != nil { + return fmt.Errorf("failed to load container state: %v", err) + } + + containerRootDir, err := s.GetContainerRoot() + if err != nil || containerRootDir == "" || containerRootDir == "/" { + return fmt.Errorf("failed to determined container root: %v", err) + } + + cmd, err := ldconfig.NewRunner( + reexecUpdateLdCacheCommandName, + cfg.ldconfigPath, + containerRootDir, + cfg.folders.Value()..., + ) + if err != nil { + return err + } + + return cmd.Run() +} + +// createSonameSymlinksHandler wraps createSonameSymlinks with error handling. +func createSonameSymlinksHandler() { + if err := createSonameSymlinks(os.Args); err != nil { + log.Printf("Error updating ldcache: %v", err) + os.Exit(1) + } +} + +// createSonameSymlinks ensures that soname symlinks are created in the +// specified directories. +// It is invoked from a reexec'd handler and provides namespace isolation for +// the operations performed by this hook. At the point where this is invoked, +// we are in a new mount namespace that is cloned from the parent. +// +// args[0] is the reexec initializer function name +// args[1] is the path of the ldconfig binary on the host +// args[2] is the container root directory +// The remaining args are directories where soname symlinks need to be created. +func createSonameSymlinks(args []string) error { + if len(args) < 3 { + return fmt.Errorf("incorrect arguments: %v", args) + } + hostLdconfigPath := args[1] + containerRootDirPath := args[2] + + ldconfig, err := ldconfig.New( + hostLdconfigPath, + containerRootDirPath, + ) + if err != nil { + return fmt.Errorf("failed to construct ldconfig runner: %w", err) + } + + return ldconfig.CreateSonameSymlinks(args[3:]...) +} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/container-root.go b/cmd/nvidia-cdi-hook/update-ldcache/container-root.go deleted file mode 100644 index 71a49469..00000000 --- a/cmd/nvidia-cdi-hook/update-ldcache/container-root.go +++ /dev/null @@ -1,46 +0,0 @@ -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package ldcache - -import ( - "os" - "path/filepath" - - "github.com/moby/sys/symlink" -) - -// A containerRoot represents the root filesystem of a container. -type containerRoot string - -// hasPath checks whether the specified path exists in the root. -func (r containerRoot) hasPath(path string) bool { - resolved, err := r.resolve(path) - if err != nil { - return false - } - if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) { - return false - } - return true -} - -// resolve returns the absolute path including root path. -// Symlinks are resolved, but are guaranteed to resolve in the root. -func (r containerRoot) resolve(path string) (string, error) { - absolute := filepath.Clean(filepath.Join(string(r), path)) - return symlink.FollowSymlinkInScope(absolute, string(r)) -} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go index 49b73371..ef614709 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go @@ -21,24 +21,16 @@ import ( "fmt" "log" "os" - "strings" "github.com/moby/sys/reexec" "github.com/urfave/cli/v2" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" ) const ( - // ldsoconfdFilenamePattern specifies the pattern for the filename - // in ld.so.conf.d that includes references to the specified directories. - // The 00-nvcr prefix is chosen to ensure that these libraries have a - // higher precedence than other libraries on the system, but lower than - // the 00-cuda-compat that is included in some containers. - ldsoconfdFilenamePattern = "00-nvcr-*.conf" - reexecUpdateLdCacheCommandName = "reexec-update-ldcache" ) @@ -123,15 +115,15 @@ func (m command) run(c *cli.Context, cfg *options) error { return fmt.Errorf("failed to determined container root: %v", err) } - args := []string{ + cmd, err := ldconfig.NewRunner( reexecUpdateLdCacheCommandName, - strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"), + cfg.ldconfigPath, containerRootDir, + cfg.folders.Value()..., + ) + if err != nil { + return err } - args = append(args, cfg.folders.Value()...) - - cmd := createReexecCommand(args) - return cmd.Run() } @@ -143,15 +135,16 @@ func updateLdCacheHandler() { } } -// updateLdCache is invoked from a reexec'd handler and provides namespace -// isolation for the operations performed by this hook. -// At the point where this is invoked, we are in a new mount namespace that is -// cloned from the parent. +// updateLdCache ensures that the ldcache in the container is updated to include +// libraries that are mounted from the host. +// It is invoked from a reexec'd handler and provides namespace isolation for +// the operations performed by this hook. At the point where this is invoked, +// we are in a new mount namespace that is cloned from the parent. // // args[0] is the reexec initializer function name // args[1] is the path of the ldconfig binary on the host // args[2] is the container root directory -// The remaining args are folders that need to be added to the ldcache. +// The remaining args are folders where soname symlinks need to be created. func updateLdCache(args []string) error { if len(args) < 3 { return fmt.Errorf("incorrect arguments: %v", args) @@ -159,97 +152,13 @@ func updateLdCache(args []string) error { hostLdconfigPath := args[1] containerRootDirPath := args[2] - // To prevent leaking the parent proc filesystem, we create a new proc mount - // in the container root. - if err := mountProc(containerRootDirPath); err != nil { - return fmt.Errorf("error mounting /proc: %w", err) - } - - // We mount the host ldconfig before we pivot root since host paths are not - // visible after the pivot root operation. - ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath) + ldconfig, err := ldconfig.New( + hostLdconfigPath, + containerRootDirPath, + ) if err != nil { - return fmt.Errorf("error mounting host ldconfig: %w", err) + return fmt.Errorf("failed to construct ldconfig runner: %w", err) } - // We pivot to the container root for the new process, this further limits - // access to the host. - if err := pivotRoot(containerRootDirPath); err != nil { - return fmt.Errorf("error running pivot_root: %w", err) - } - - return runLdconfig(ldconfigPath, args[3:]...) -} - -// runLdconfig runs the ldconfig binary and ensures that the specified directories -// are processed for the ldcache. -func runLdconfig(ldconfigPath string, directories ...string) error { - args := []string{ - "ldconfig", - // Explicitly specify using /etc/ld.so.conf since the host's ldconfig may - // be configured to use a different config file by default. - // Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is - // in the container. - "-f", "/etc/ld.so.conf", - } - - containerRoot := containerRoot("/") - - if containerRoot.hasPath("/etc/ld.so.cache") { - args = append(args, "-C", "/etc/ld.so.cache") - } else { - args = append(args, "-N") - } - - if containerRoot.hasPath("/etc/ld.so.conf.d") { - err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...) - if err != nil { - return fmt.Errorf("failed to update ld.so.conf.d: %w", err) - } - } else { - args = append(args, directories...) - } - - return SafeExec(ldconfigPath, args, nil) -} - -// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/. -// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and -// contains the specified directories on each line. -func createLdsoconfdFile(pattern string, dirs ...string) error { - if len(dirs) == 0 { - return nil - } - - ldsoconfdDir := "/etc/ld.so.conf.d" - if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { - return fmt.Errorf("failed to create ld.so.conf.d: %w", err) - } - - configFile, err := os.CreateTemp(ldsoconfdDir, pattern) - if err != nil { - return fmt.Errorf("failed to create config file: %w", err) - } - defer func() { - _ = configFile.Close() - }() - - added := make(map[string]bool) - for _, dir := range dirs { - if added[dir] { - continue - } - _, err = fmt.Fprintf(configFile, "%s\n", dir) - if err != nil { - return fmt.Errorf("failed to update config file: %w", err) - } - added[dir] = true - } - - // The created file needs to be world readable for the cases where the container is run as a non-root user. - if err := configFile.Chmod(0644); err != nil { - return fmt.Errorf("failed to chmod config file: %w", err) - } - - return nil + return ldconfig.UpdateLDCache(args[3:]...) } diff --git a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go index fd8a4bf8..671042d2 100644 --- a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go @@ -98,6 +98,15 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: {{ .toolkitRoot }}/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: {{ .toolkitRoot }}/nvidia-cdi-hook args: diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index b8059b42..7adf0e00 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -103,6 +103,15 @@ containerEdits: - --host-driver-version=999.88.77 env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: @@ -180,6 +189,15 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: @@ -257,6 +275,15 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: diff --git a/internal/discover/hooks.go b/internal/discover/hooks.go index 36e13c78..8020e7bc 100644 --- a/internal/discover/hooks.go +++ b/internal/discover/hooks.go @@ -46,6 +46,9 @@ const ( // An UpdateLDCacheHook is the hook used to update the ldcache in the // container. This allows injected libraries to be discoverable. UpdateLDCacheHook = HookName("update-ldcache") + // A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks + // for injected libraries exist in the container. + CreateSonameSymlinksHook = HookName("create-soname-symlinks") defaultNvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" ) diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index eb5ab467..15356de8 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -51,28 +51,22 @@ func (d ldconfig) Hooks() ([]Hook, error) { return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) } - h := createLDCacheUpdateHook( - d.hookCreator, - d.ldconfigPath, - getLibraryPaths(mounts), - ) - - return h.Hooks() -} - -// createLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache -func createLDCacheUpdateHook(hookCreator HookCreator, ldconfig string, libraries []string) *Hook { var args []string - if ldconfig != "" { - args = append(args, "--ldconfig-path", ldconfig) + if d.ldconfigPath != "" { + args = append(args, "--ldconfig-path", d.ldconfigPath) } - for _, f := range uniqueFolders(libraries) { + for _, f := range uniqueFolders(getLibraryPaths(mounts)) { args = append(args, "--folder", f) } - return hookCreator.Create(UpdateLDCacheHook, args...) + h := Merge( + d.hookCreator.Create(CreateSonameSymlinksHook, args...), + d.hookCreator.Create(UpdateLDCacheHook, args...), + ) + + return h.Hooks() } // getLibraryPaths extracts the library dirs from the specified mounts diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index ff18118d..30576a7b 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -39,11 +39,24 @@ func TestLDCacheUpdateHook(t *testing.T) { mounts []Mount mountError error expectedError error - expectedArgs []string + expectedHooks []Hook }{ { - description: "empty mounts", - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache"}, + description: "empty mounts", + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + }, }, { description: "mount error", @@ -66,7 +79,20 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libbar.so", }, }, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + }, }, { description: "host paths are ignored", @@ -76,12 +102,38 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libfoo.so", }, }, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + }, }, { description: "explicit ldconfig path is passed", ldconfigPath: testLdconfigPath, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--ldconfig-path", testLdconfigPath}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + }, }, } @@ -92,13 +144,6 @@ func TestLDCacheUpdateHook(t *testing.T) { return tc.mounts, tc.mountError }, } - expectedHook := Hook{ - Path: testNvidiaCDIHookPath, - Args: tc.expectedArgs, - Lifecycle: "createContainer", - Env: []string{"NVIDIA_CTK_DEBUG=false"}, - } - d, err := NewLDCacheUpdateHook(logger, mountMock, hookCreator, tc.ldconfigPath) require.NoError(t, err) @@ -112,9 +157,7 @@ func TestLDCacheUpdateHook(t *testing.T) { } require.NoError(t, err) - require.Len(t, hooks, 1) - - require.EqualValues(t, hooks[0], expectedHook) + require.EqualValues(t, tc.expectedHooks, hooks) devices, err := d.Devices() require.NoError(t, err) diff --git a/internal/ldconfig/ldconfig.go b/internal/ldconfig/ldconfig.go new file mode 100644 index 00000000..f3db1a77 --- /dev/null +++ b/internal/ldconfig/ldconfig.go @@ -0,0 +1,206 @@ +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package ldconfig + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" +) + +const ( + // ldsoconfdFilenamePattern specifies the pattern for the filename + // in ld.so.conf.d that includes references to the specified directories. + // The 00-nvcr prefix is chosen to ensure that these libraries have a + // higher precedence than other libraries on the system, but lower than + // the 00-cuda-compat that is included in some containers. + ldsoconfdFilenamePattern = "00-nvcr-*.conf" +) + +type Ldconfig struct { + ldconfigPath string + inRoot string +} + +// NewRunner creates an exec.Cmd that can be used to run ldconfig. +func NewRunner(id string, ldconfigPath string, containerRoot string, additionalargs ...string) (*exec.Cmd, error) { + args := []string{ + id, + strings.TrimPrefix(config.NormalizeLDConfigPath("@"+ldconfigPath), "@"), + containerRoot, + } + args = append(args, additionalargs...) + + return createReexecCommand(args) +} + +// New creates an Ldconfig struct that is used to perform operations on the +// ldcache and libraries in a particular root (e.g. a container). +func New(ldconfigPath string, inRoot string) (*Ldconfig, error) { + l := &Ldconfig{ + ldconfigPath: ldconfigPath, + inRoot: inRoot, + } + if ldconfigPath == "" { + return nil, fmt.Errorf("an ldconfig path must be specified") + } + if inRoot == "" || inRoot == "/" { + return nil, fmt.Errorf("ldconfig must be run in the non-system root") + } + return l, nil +} + +// CreateSonameSymlinks uses ldconfig to create the soname symlinks in the +// specified directories. +func (l *Ldconfig) CreateSonameSymlinks(directories ...string) error { + if len(directories) == 0 { + return nil + } + ldconfigPath, err := l.prepareRoot() + if err != nil { + return err + } + + args := []string{ + filepath.Base(ldconfigPath), + // Explicitly disable updating the LDCache. + "-N", + // Specify -n to only process the specified directories. + "-n", + } + args = append(args, directories...) + + return SafeExec(ldconfigPath, args, nil) +} + +func (l *Ldconfig) UpdateLDCache(directories ...string) error { + ldconfigPath, err := l.prepareRoot() + if err != nil { + return err + } + + args := []string{ + filepath.Base(ldconfigPath), + // Explicitly specify using /etc/ld.so.conf since the host's ldconfig may + // be configured to use a different config file by default. + "-f", "/etc/ld.so.conf", + } + + if l.ldcacheExists() { + args = append(args, "-C", "/etc/ld.so.cache") + } else { + args = append(args, "-N") + } + + // If the ld.so.conf.d directory exists, we create a config file there + // containing the required directories, otherwise we add the specified + // directories to the ldconfig command directly. + if l.ldsoconfdDirectoryExists() { + err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...) + if err != nil { + return fmt.Errorf("failed to update ld.so.conf.d: %w", err) + } + } else { + args = append(args, directories...) + } + + return SafeExec(ldconfigPath, args, nil) +} + +func (l *Ldconfig) prepareRoot() (string, error) { + // To prevent leaking the parent proc filesystem, we create a new proc mount + // in the specified root. + if err := mountProc(l.inRoot); err != nil { + return "", fmt.Errorf("error mounting /proc: %w", err) + } + + // We mount the host ldconfig before we pivot root since host paths are not + // visible after the pivot root operation. + ldconfigPath, err := mountLdConfig(l.ldconfigPath, l.inRoot) + if err != nil { + return "", fmt.Errorf("error mounting host ldconfig: %w", err) + } + + // We pivot to the container root for the new process, this further limits + // access to the host. + if err := pivotRoot(l.inRoot); err != nil { + return "", fmt.Errorf("error running pivot_root: %w", err) + } + + return ldconfigPath, nil +} + +func (l *Ldconfig) ldcacheExists() bool { + if _, err := os.Stat("/etc/ld.so.cache"); err != nil && os.IsNotExist(err) { + return false + } + return true +} + +func (l *Ldconfig) ldsoconfdDirectoryExists() bool { + info, err := os.Stat("/etc/ld.so.conf.d") + if os.IsNotExist(err) { + return false + } + return info.IsDir() +} + +// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/. +// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and +// contains the specified directories on each line. +func createLdsoconfdFile(pattern string, dirs ...string) error { + if len(dirs) == 0 { + return nil + } + + ldsoconfdDir := "/etc/ld.so.conf.d" + if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { + return fmt.Errorf("failed to create ld.so.conf.d: %w", err) + } + + configFile, err := os.CreateTemp(ldsoconfdDir, pattern) + if err != nil { + return fmt.Errorf("failed to create config file: %w", err) + } + defer func() { + _ = configFile.Close() + }() + + added := make(map[string]bool) + for _, dir := range dirs { + if added[dir] { + continue + } + _, err = fmt.Fprintf(configFile, "%s\n", dir) + if err != nil { + return fmt.Errorf("failed to update config file: %w", err) + } + added[dir] = true + } + + // The created file needs to be world readable for the cases where the container is run as a non-root user. + if err := configFile.Chmod(0644); err != nil { + return fmt.Errorf("failed to chmod config file: %w", err) + } + + return nil +} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go b/internal/ldconfig/ldconfig_linux.go similarity index 98% rename from cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go rename to internal/ldconfig/ldconfig_linux.go index c472d14f..79702b11 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go +++ b/internal/ldconfig/ldconfig_linux.go @@ -17,7 +17,7 @@ # limitations under the License. **/ -package ldcache +package ldconfig import ( "errors" @@ -29,8 +29,8 @@ import ( "syscall" securejoin "github.com/cyphar/filepath-securejoin" - "github.com/moby/sys/reexec" + "github.com/opencontainers/runc/libcontainer/utils" "golang.org/x/sys/unix" ) @@ -182,7 +182,7 @@ func createTmpFs(target string, size int) error { // createReexecCommand creates a command that can be used to trigger the reexec // initializer. // On linux this command runs in new namespaces. -func createReexecCommand(args []string) *exec.Cmd { +func createReexecCommand(args []string) (*exec.Cmd, error) { cmd := reexec.Command(args...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout @@ -196,5 +196,5 @@ func createReexecCommand(args []string) *exec.Cmd { syscall.CLONE_NEWNET, } - return cmd + return cmd, nil } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go b/internal/ldconfig/ldconfig_other.go similarity index 76% rename from cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go rename to internal/ldconfig/ldconfig_other.go index a6c35261..c5d452a2 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go +++ b/internal/ldconfig/ldconfig_other.go @@ -17,14 +17,11 @@ # limitations under the License. **/ -package ldcache +package ldconfig import ( "fmt" - "os" "os/exec" - - "github.com/moby/sys/reexec" ) func pivotRoot(newroot string) error { @@ -39,13 +36,6 @@ func mountProc(newroot string) error { return fmt.Errorf("not supported") } -// createReexecCommand creates a command that can be used ot trigger the reexec -// initializer. -func createReexecCommand(args []string) *exec.Cmd { - cmd := reexec.Command(args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - return cmd +func createReexecCommand(args []string) (*exec.Cmd, error) { + return nil, fmt.Errorf("not supported") } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go b/internal/ldconfig/safe-exec_linux.go similarity index 98% rename from cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go rename to internal/ldconfig/safe-exec_linux.go index 790ac6a8..09b6cc22 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go +++ b/internal/ldconfig/safe-exec_linux.go @@ -16,7 +16,7 @@ # limitations under the License. **/ -package ldcache +package ldconfig import ( "fmt" diff --git a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go b/internal/ldconfig/safe-exec_other.go similarity index 98% rename from cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go rename to internal/ldconfig/safe-exec_other.go index 71f9ddb5..3d017645 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go +++ b/internal/ldconfig/safe-exec_other.go @@ -16,7 +16,7 @@ # limitations under the License. **/ -package ldcache +package ldconfig import "syscall" diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 50d5c4ea..8bd0bf12 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -56,6 +56,9 @@ const ( EnableCudaCompatHook = discover.EnableCudaCompatHook // An UpdateLDCacheHook is used to update the ldcache in the container. UpdateLDCacheHook = discover.UpdateLDCacheHook + // A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks + // for injected libraries exist in the container. + CreateSonameSymlinksHook = discover.CreateSonameSymlinksHook // Deprecated: Use CreateSymlinksHook instead. HookCreateSymlinks = CreateSymlinksHook diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index f89b79a5..c143ea7d 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -235,4 +235,26 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(output).To(Equal("ModifyDeviceFiles: 0\n")) }) }) + + When("A container is run using CDI", Ordered, func() { + BeforeAll(func(ctx context.Context) { + _, _, err := runner.Run("docker pull ubuntu") + Expect(err).ToNot(HaveOccurred()) + }) + + It("should include libcuda.so in the ldcache", func(ctx context.Context) { + ldcacheOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu bash -c \"ldconfig -p | grep 'libcuda.so'\"") + Expect(err).ToNot(HaveOccurred()) + Expect(ldcacheOutput).ToNot(BeEmpty()) + + ldcacheLines := strings.Split(ldcacheOutput, "\n") + var libs []string + for _, line := range ldcacheLines { + parts := strings.SplitN(line, " (", 2) + libs = append(libs, strings.TrimSpace(parts[0])) + } + + Expect(libs).To(ContainElements([]string{"libcuda.so", "libcuda.so.1"})) + }) + }) })