From 4bf7421a80ae054ec1fc92fa71aa31fa6d9ea59b Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 27 Feb 2025 14:38:37 +0200 Subject: [PATCH 1/2] Add create-soname-symlinks hook This change adds a create-soname-symlinks hook that can be used to ensure that the soname symlinks for injected libraries exist in a container. This is done by calling ldconfig -n -N for the directories containing the injected libraries. This also ensures that libcuda.so is present in the ldcache when the update-ldcache hook is run. Signed-off-by: Evan Lezar --- cmd/nvidia-cdi-hook/commands/commands.go | 2 + .../create-soname-symlinks/ldconfig_linux.go | 200 ++++++++++++++++++ .../create-soname-symlinks/ldconfig_other.go | 51 +++++ .../create-soname-symlinks/safe-exec_linux.go | 58 +++++ .../create-soname-symlinks/safe-exec_other.go | 28 +++ .../create-soname-symlinks/soname-symlinks.go | 191 +++++++++++++++++ .../toolkit/toolkit_test.go | 9 + cmd/nvidia-ctk/cdi/generate/generate_test.go | 27 +++ internal/discover/hooks.go | 3 + internal/discover/ldconfig.go | 24 +-- internal/discover/ldconfig_test.go | 75 +++++-- pkg/nvcdi/api.go | 3 + tests/e2e/nvidia-container-toolkit_test.go | 22 ++ 13 files changed, 662 insertions(+), 31 deletions(-) create mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go create mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go create mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go create mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go create mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go diff --git a/cmd/nvidia-cdi-hook/commands/commands.go b/cmd/nvidia-cdi-hook/commands/commands.go index 8917c25d..455b2afa 100644 --- a/cmd/nvidia-cdi-hook/commands/commands.go +++ b/cmd/nvidia-cdi-hook/commands/commands.go @@ -20,6 +20,7 @@ import ( "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod" + createsonamesymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-soname-symlinks" symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat" disabledevicenodemodification "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/disable-device-node-modification" @@ -35,6 +36,7 @@ func New(logger logger.Interface) []*cli.Command { symlinks.NewCommand(logger), chmod.NewCommand(logger), cudacompat.NewCommand(logger), + createsonamesymlinks.NewCommand(logger), disabledevicenodemodification.NewCommand(logger), } } diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go new file mode 100644 index 00000000..ffa88b41 --- /dev/null +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go @@ -0,0 +1,200 @@ +//go:build linux + +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package create_soname_symlinks + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strconv" + "syscall" + + securejoin "github.com/cyphar/filepath-securejoin" + + "github.com/moby/sys/reexec" + "github.com/opencontainers/runc/libcontainer/utils" + "golang.org/x/sys/unix" +) + +// pivotRoot will call pivot_root such that rootfs becomes the new root +// filesystem, and everything else is cleaned up. +// This is adapted from the implementation here: +// +// https://github.com/opencontainers/runc/blob/e89a29929c775025419ab0d218a43588b4c12b9a/libcontainer/rootfs_linux.go#L1056-L1113 +// +// With the `mount` and `unmount` calls changed to direct unix.Mount and unix.Unmount calls. +func pivotRoot(rootfs string) error { + // While the documentation may claim otherwise, pivot_root(".", ".") is + // actually valid. What this results in is / being the new root but + // /proc/self/cwd being the old root. Since we can play around with the cwd + // with pivot_root this allows us to pivot without creating directories in + // the rootfs. Shout-outs to the LXC developers for giving us this idea. + + oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return &os.PathError{Op: "open", Path: "/", Err: err} + } + defer unix.Close(oldroot) //nolint: errcheck + + newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return &os.PathError{Op: "open", Path: rootfs, Err: err} + } + defer unix.Close(newroot) //nolint: errcheck + + // Change to the new root so that the pivot_root actually acts on it. + if err := unix.Fchdir(newroot); err != nil { + return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err} + } + + if err := unix.PivotRoot(".", "."); err != nil { + return &os.PathError{Op: "pivot_root", Path: ".", Err: err} + } + + // Currently our "." is oldroot (according to the current kernel code). + // However, purely for safety, we will fchdir(oldroot) since there isn't + // really any guarantee from the kernel what /proc/self/cwd will be after a + // pivot_root(2). + + if err := unix.Fchdir(oldroot); err != nil { + return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err} + } + + // Make oldroot rslave to make sure our unmounts don't propagate to the + // host (and thus bork the machine). We don't use rprivate because this is + // known to cause issues due to races where we still have a reference to a + // mount while a process in the host namespace are trying to operate on + // something they think has no mounts (devicemapper in particular). + if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { + return err + } + // Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. + if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { + return err + } + + // Switch back to our shiny new root. + if err := unix.Chdir("/"); err != nil { + return &os.PathError{Op: "chdir", Path: "/", Err: err} + } + return nil +} + +// mountLdConfig mounts the host ldconfig to the mount namespace of the hook. +// We use WithProcfd to perform the mount operations to ensure that the changes +// are persisted across the pivot root. +func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) { + hostLdconfigInfo, err := os.Stat(hostLdconfigPath) + if err != nil { + return "", fmt.Errorf("error reading host ldconfig: %w", err) + } + + hookScratchDirPath := "/var/run/nvidia-ctk-hook" + ldconfigPath := filepath.Join(hookScratchDirPath, "ldconfig") + if err := utils.MkdirAllInRoot(containerRootDirPath, hookScratchDirPath, 0755); err != nil { + return "", fmt.Errorf("error creating hook scratch folder: %w", err) + } + + err = utils.WithProcfd(containerRootDirPath, hookScratchDirPath, func(hookScratchDirFdPath string) error { + return createTmpFs(hookScratchDirFdPath, int(hostLdconfigInfo.Size())) + + }) + if err != nil { + return "", fmt.Errorf("error creating tmpfs: %w", err) + } + + if _, err := createFileInRoot(containerRootDirPath, ldconfigPath, hostLdconfigInfo.Mode()); err != nil { + return "", fmt.Errorf("error creating ldconfig: %w", err) + } + + err = utils.WithProcfd(containerRootDirPath, ldconfigPath, func(ldconfigFdPath string) error { + return unix.Mount(hostLdconfigPath, ldconfigFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "") + }) + if err != nil { + return "", fmt.Errorf("error bind mounting host ldconfig: %w", err) + } + + return ldconfigPath, nil +} + +func createFileInRoot(containerRootDirPath string, destinationPath string, mode os.FileMode) (string, error) { + dest, err := securejoin.SecureJoin(containerRootDirPath, destinationPath) + if err != nil { + return "", err + } + // Make the parent directory. + destDir, destBase := filepath.Split(dest) + destDirFd, err := utils.MkdirAllInRootOpen(containerRootDirPath, destDir, 0755) + if err != nil { + return "", fmt.Errorf("error creating parent dir: %w", err) + } + defer destDirFd.Close() + // Make the target file. We want to avoid opening any file that is + // already there because it could be a "bad" file like an invalid + // device or hung tty that might cause a DoS, so we use mknodat. + // destBase does not contain any "/" components, and mknodat does + // not follow trailing symlinks, so we can safely just call mknodat + // here. + if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|uint32(mode), 0); err != nil { + // If we get EEXIST, there was already an inode there and + // we can consider that a success. + if !errors.Is(err, unix.EEXIST) { + return "", fmt.Errorf("error creating empty file: %w", err) + } + } + return dest, nil +} + +// mountProc mounts a clean proc filesystem in the new root. +func mountProc(newroot string) error { + target := filepath.Join(newroot, "/proc") + + if err := os.MkdirAll(target, 0755); err != nil { + return fmt.Errorf("error creating directory: %w", err) + } + return unix.Mount("proc", target, "proc", 0, "") +} + +// createTmpFs creates a tmpfs at the specified location with the specified size. +func createTmpFs(target string, size int) error { + return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size)) +} + +// createReexecCommand creates a command that can be used to trigger the reexec +// initializer. +// On linux this command runs in new namespaces. +func createReexecCommand(args []string) *exec.Cmd { + cmd := reexec.Command(args...) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWNS | + syscall.CLONE_NEWUTS | + syscall.CLONE_NEWIPC | + syscall.CLONE_NEWPID | + syscall.CLONE_NEWNET, + } + + return cmd +} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go new file mode 100644 index 00000000..86fc44c8 --- /dev/null +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go @@ -0,0 +1,51 @@ +//go:build !linux + +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package create_soname_symlinks + +import ( + "fmt" + "os" + "os/exec" + + "github.com/moby/sys/reexec" +) + +func pivotRoot(newroot string) error { + return fmt.Errorf("not supported") +} + +func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) { + return "", fmt.Errorf("not supported") +} + +func mountProc(newroot string) error { + return fmt.Errorf("not supported") +} + +// createReexecCommand creates a command that can be used ot trigger the reexec +// initializer. +func createReexecCommand(args []string) *exec.Cmd { + cmd := reexec.Command(args...) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + return cmd +} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go new file mode 100644 index 00000000..6efb2cc9 --- /dev/null +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go @@ -0,0 +1,58 @@ +//go:build linux + +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package create_soname_symlinks + +import ( + "fmt" + "os" + "strconv" + "syscall" + + "github.com/opencontainers/runc/libcontainer/exeseal" +) + +// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it. +func SafeExec(path string, args []string, envv []string) error { + safeExe, err := cloneBinary(path) + if err != nil { + //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection + return syscall.Exec(path, args, envv) + } + defer safeExe.Close() + + exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd())) + //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection + return syscall.Exec(exePath, args, envv) +} + +func cloneBinary(path string) (*os.File, error) { + exe, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("opening current binary: %w", err) + } + defer exe.Close() + + stat, err := exe.Stat() + if err != nil { + return nil, fmt.Errorf("checking %v size: %w", path, err) + } + size := stat.Size() + + return exeseal.CloneBinary(exe, size, path, os.TempDir()) +} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go new file mode 100644 index 00000000..d997fd8f --- /dev/null +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go @@ -0,0 +1,28 @@ +//go:build !linux + +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package create_soname_symlinks + +import "syscall" + +// SafeExec is not implemented on non-linux systems and forwards directly to the +// Exec syscall. +func SafeExec(path string, args []string, envv []string) error { + //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection + return syscall.Exec(path, args, envv) +} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go new file mode 100644 index 00000000..524a5536 --- /dev/null +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go @@ -0,0 +1,191 @@ +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package create_soname_symlinks + +import ( + "errors" + "fmt" + "log" + "os" + "strings" + + "github.com/moby/sys/reexec" + "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" +) + +const ( + reexecUpdateLdCacheCommandName = "reexec-create-soname-symlinks" +) + +type command struct { + logger logger.Interface +} + +type options struct { + folders cli.StringSlice + ldconfigPath string + containerSpec string +} + +func init() { + reexec.Register(reexecUpdateLdCacheCommandName, createSonameSymlinksHandler) + if reexec.Init() { + os.Exit(0) + } +} + +// NewCommand constructs an create-soname-symlinks command with the specified logger +func NewCommand(logger logger.Interface) *cli.Command { + c := command{ + logger: logger, + } + return c.build() +} + +// build the create-soname-symlinks command +func (m command) build() *cli.Command { + cfg := options{} + + // Create the 'create-soname-symlinks' command + c := cli.Command{ + Name: "create-soname-symlinks", + Usage: "Create soname symlinks libraries in specified directories", + Before: func(c *cli.Context) error { + return m.validateFlags(c, &cfg) + }, + Action: func(c *cli.Context) error { + return m.run(c, &cfg) + }, + } + + c.Flags = []cli.Flag{ + &cli.StringSliceFlag{ + Name: "folder", + Usage: "Specify a directory to generate soname symlinks in. Can be specified multiple times", + Destination: &cfg.folders, + }, + &cli.StringFlag{ + Name: "ldconfig-path", + Usage: "Specify the path to ldconfig on the host", + Destination: &cfg.ldconfigPath, + Value: "/sbin/ldconfig", + }, + &cli.StringFlag{ + Name: "container-spec", + Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", + Destination: &cfg.containerSpec, + }, + } + + return &c +} + +func (m command) validateFlags(c *cli.Context, cfg *options) error { + if cfg.ldconfigPath == "" { + return errors.New("ldconfig-path must be specified") + } + return nil +} + +func (m command) run(c *cli.Context, cfg *options) error { + s, err := oci.LoadContainerState(cfg.containerSpec) + if err != nil { + return fmt.Errorf("failed to load container state: %v", err) + } + + containerRootDir, err := s.GetContainerRoot() + if err != nil || containerRootDir == "" || containerRootDir == "/" { + return fmt.Errorf("failed to determined container root: %v", err) + } + + args := []string{ + reexecUpdateLdCacheCommandName, + strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"), + containerRootDir, + } + args = append(args, cfg.folders.Value()...) + + cmd := createReexecCommand(args) + + return cmd.Run() +} + +// createSonameSymlinksHandler wraps createSonameSymlinks with error handling. +func createSonameSymlinksHandler() { + if err := createSonameSymlinks(os.Args); err != nil { + log.Printf("Error updating ldcache: %v", err) + os.Exit(1) + } +} + +// createSonameSymlinks is invoked from a reexec'd handler and provides namespace +// isolation for the operations performed by this hook. +// At the point where this is invoked, we are in a new mount namespace that is +// cloned from the parent. +// +// args[0] is the reexec initializer function name +// args[1] is the path of the ldconfig binary on the host +// args[2] is the container root directory +// The remaining args are directories that need to be added to the ldcache. +func createSonameSymlinks(args []string) error { + if len(args) < 3 { + return fmt.Errorf("incorrect arguments: %v", args) + } + hostLdconfigPath := args[1] + containerRootDirPath := args[2] + + // To prevent leaking the parent proc filesystem, we create a new proc mount + // in the container root. + if err := mountProc(containerRootDirPath); err != nil { + return fmt.Errorf("error mounting /proc: %w", err) + } + + // We mount the host ldconfig before we pivot root since host paths are not + // visible after the pivot root operation. + ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath) + if err != nil { + return fmt.Errorf("error mounting host ldconfig: %w", err) + } + + // We pivot to the container root for the new process, this further limits + // access to the host. + if err := pivotRoot(containerRootDirPath); err != nil { + return fmt.Errorf("error running pivot_root: %w", err) + } + + return runLdconfig(ldconfigPath, args[3:]...) +} + +// runLdconfig runs the ldconfig binary and ensures that soname symlinks are +// created in the specified directories. +func runLdconfig(ldconfigPath string, directories ...string) error { + args := []string{ + "ldconfig", + // Explicitly disable updating the LDCache. + "-N", + // Specify -n to only process the specified directories. + "-n", + } + args = append(args, directories...) + + return SafeExec(ldconfigPath, args, nil) +} diff --git a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go index d7246330..ed90c8e3 100644 --- a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go @@ -97,6 +97,15 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: {{ .toolkitRoot }}/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: {{ .toolkitRoot }}/nvidia-cdi-hook args: diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index 6f762761..d2e31749 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -102,6 +102,15 @@ containerEdits: - --host-driver-version=999.88.77 env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: @@ -178,6 +187,15 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: @@ -254,6 +272,15 @@ containerEdits: - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so env: - NVIDIA_CTK_DEBUG=false + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu + env: + - NVIDIA_CTK_DEBUG=false - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: diff --git a/internal/discover/hooks.go b/internal/discover/hooks.go index 3f2c9ebb..893c052d 100644 --- a/internal/discover/hooks.go +++ b/internal/discover/hooks.go @@ -46,6 +46,9 @@ const ( // An UpdateLDCacheHook is the hook used to update the ldcache in the // container. This allows injected libraries to be discoverable. UpdateLDCacheHook = HookName("update-ldcache") + // A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks + // for injected libraries exist in the container. + CreateSonameSymlinksHook = HookName("create-soname-symlinks") defaultNvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook" ) diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index eb5ab467..15356de8 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -51,28 +51,22 @@ func (d ldconfig) Hooks() ([]Hook, error) { return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) } - h := createLDCacheUpdateHook( - d.hookCreator, - d.ldconfigPath, - getLibraryPaths(mounts), - ) - - return h.Hooks() -} - -// createLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache -func createLDCacheUpdateHook(hookCreator HookCreator, ldconfig string, libraries []string) *Hook { var args []string - if ldconfig != "" { - args = append(args, "--ldconfig-path", ldconfig) + if d.ldconfigPath != "" { + args = append(args, "--ldconfig-path", d.ldconfigPath) } - for _, f := range uniqueFolders(libraries) { + for _, f := range uniqueFolders(getLibraryPaths(mounts)) { args = append(args, "--folder", f) } - return hookCreator.Create(UpdateLDCacheHook, args...) + h := Merge( + d.hookCreator.Create(CreateSonameSymlinksHook, args...), + d.hookCreator.Create(UpdateLDCacheHook, args...), + ) + + return h.Hooks() } // getLibraryPaths extracts the library dirs from the specified mounts diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index ff18118d..30576a7b 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -39,11 +39,24 @@ func TestLDCacheUpdateHook(t *testing.T) { mounts []Mount mountError error expectedError error - expectedArgs []string + expectedHooks []Hook }{ { - description: "empty mounts", - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache"}, + description: "empty mounts", + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + }, }, { description: "mount error", @@ -66,7 +79,20 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libbar.so", }, }, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + }, }, { description: "host paths are ignored", @@ -76,12 +102,38 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libfoo.so", }, }, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + }, }, { description: "explicit ldconfig path is passed", ldconfigPath: testLdconfigPath, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--ldconfig-path", testLdconfigPath}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + Env: []string{"NVIDIA_CTK_DEBUG=false"}, + }, + }, }, } @@ -92,13 +144,6 @@ func TestLDCacheUpdateHook(t *testing.T) { return tc.mounts, tc.mountError }, } - expectedHook := Hook{ - Path: testNvidiaCDIHookPath, - Args: tc.expectedArgs, - Lifecycle: "createContainer", - Env: []string{"NVIDIA_CTK_DEBUG=false"}, - } - d, err := NewLDCacheUpdateHook(logger, mountMock, hookCreator, tc.ldconfigPath) require.NoError(t, err) @@ -112,9 +157,7 @@ func TestLDCacheUpdateHook(t *testing.T) { } require.NoError(t, err) - require.Len(t, hooks, 1) - - require.EqualValues(t, hooks[0], expectedHook) + require.EqualValues(t, tc.expectedHooks, hooks) devices, err := d.Devices() require.NoError(t, err) diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 50d5c4ea..8bd0bf12 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -56,6 +56,9 @@ const ( EnableCudaCompatHook = discover.EnableCudaCompatHook // An UpdateLDCacheHook is used to update the ldcache in the container. UpdateLDCacheHook = discover.UpdateLDCacheHook + // A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks + // for injected libraries exist in the container. + CreateSonameSymlinksHook = discover.CreateSonameSymlinksHook // Deprecated: Use CreateSymlinksHook instead. HookCreateSymlinks = CreateSymlinksHook diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index f89b79a5..c143ea7d 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -235,4 +235,26 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(output).To(Equal("ModifyDeviceFiles: 0\n")) }) }) + + When("A container is run using CDI", Ordered, func() { + BeforeAll(func(ctx context.Context) { + _, _, err := runner.Run("docker pull ubuntu") + Expect(err).ToNot(HaveOccurred()) + }) + + It("should include libcuda.so in the ldcache", func(ctx context.Context) { + ldcacheOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu bash -c \"ldconfig -p | grep 'libcuda.so'\"") + Expect(err).ToNot(HaveOccurred()) + Expect(ldcacheOutput).ToNot(BeEmpty()) + + ldcacheLines := strings.Split(ldcacheOutput, "\n") + var libs []string + for _, line := range ldcacheLines { + parts := strings.SplitN(line, " (", 2) + libs = append(libs, strings.TrimSpace(parts[0])) + } + + Expect(libs).To(ContainElements([]string{"libcuda.so", "libcuda.so.1"})) + }) + }) }) From 39975fc77b51ad32d7bd259eb9006c3b13383c80 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 17 Jun 2025 14:23:27 +0200 Subject: [PATCH 2/2] [no-relnote] Refactor ldconfig hooks Signed-off-by: Evan Lezar --- .../create-soname-symlinks/ldconfig_other.go | 51 ----- .../create-soname-symlinks/safe-exec_linux.go | 58 ----- .../create-soname-symlinks/safe-exec_other.go | 28 --- .../create-soname-symlinks/soname-symlinks.go | 63 ++---- .../update-ldcache/container-root.go | 46 ---- .../update-ldcache/ldconfig_linux.go | 200 ----------------- .../update-ldcache/update-ldcache.go | 129 ++--------- internal/ldconfig/ldconfig.go | 206 ++++++++++++++++++ .../ldconfig}/ldconfig_linux.go | 8 +- .../ldconfig}/ldconfig_other.go | 16 +- .../ldconfig}/safe-exec_linux.go | 2 +- .../ldconfig}/safe-exec_other.go | 2 +- 12 files changed, 253 insertions(+), 556 deletions(-) delete mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go delete mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go delete mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go delete mode 100644 cmd/nvidia-cdi-hook/update-ldcache/container-root.go delete mode 100644 cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go create mode 100644 internal/ldconfig/ldconfig.go rename {cmd/nvidia-cdi-hook/create-soname-symlinks => internal/ldconfig}/ldconfig_linux.go (98%) rename {cmd/nvidia-cdi-hook/update-ldcache => internal/ldconfig}/ldconfig_other.go (76%) rename {cmd/nvidia-cdi-hook/update-ldcache => internal/ldconfig}/safe-exec_linux.go (98%) rename {cmd/nvidia-cdi-hook/update-ldcache => internal/ldconfig}/safe-exec_other.go (98%) diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go deleted file mode 100644 index 86fc44c8..00000000 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go +++ /dev/null @@ -1,51 +0,0 @@ -//go:build !linux - -/** -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package create_soname_symlinks - -import ( - "fmt" - "os" - "os/exec" - - "github.com/moby/sys/reexec" -) - -func pivotRoot(newroot string) error { - return fmt.Errorf("not supported") -} - -func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) { - return "", fmt.Errorf("not supported") -} - -func mountProc(newroot string) error { - return fmt.Errorf("not supported") -} - -// createReexecCommand creates a command that can be used ot trigger the reexec -// initializer. -func createReexecCommand(args []string) *exec.Cmd { - cmd := reexec.Command(args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - return cmd -} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go deleted file mode 100644 index 6efb2cc9..00000000 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go +++ /dev/null @@ -1,58 +0,0 @@ -//go:build linux - -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package create_soname_symlinks - -import ( - "fmt" - "os" - "strconv" - "syscall" - - "github.com/opencontainers/runc/libcontainer/exeseal" -) - -// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it. -func SafeExec(path string, args []string, envv []string) error { - safeExe, err := cloneBinary(path) - if err != nil { - //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection - return syscall.Exec(path, args, envv) - } - defer safeExe.Close() - - exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd())) - //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection - return syscall.Exec(exePath, args, envv) -} - -func cloneBinary(path string) (*os.File, error) { - exe, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("opening current binary: %w", err) - } - defer exe.Close() - - stat, err := exe.Stat() - if err != nil { - return nil, fmt.Errorf("checking %v size: %w", path, err) - } - size := stat.Size() - - return exeseal.CloneBinary(exe, size, path, os.TempDir()) -} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go deleted file mode 100644 index d997fd8f..00000000 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go +++ /dev/null @@ -1,28 +0,0 @@ -//go:build !linux - -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package create_soname_symlinks - -import "syscall" - -// SafeExec is not implemented on non-linux systems and forwards directly to the -// Exec syscall. -func SafeExec(path string, args []string, envv []string) error { - //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection - return syscall.Exec(path, args, envv) -} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go index 524a5536..7f1da580 100644 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go @@ -22,12 +22,11 @@ import ( "fmt" "log" "os" - "strings" "github.com/moby/sys/reexec" "github.com/urfave/cli/v2" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" ) @@ -117,14 +116,15 @@ func (m command) run(c *cli.Context, cfg *options) error { return fmt.Errorf("failed to determined container root: %v", err) } - args := []string{ + cmd, err := ldconfig.NewRunner( reexecUpdateLdCacheCommandName, - strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"), + cfg.ldconfigPath, containerRootDir, + cfg.folders.Value()..., + ) + if err != nil { + return err } - args = append(args, cfg.folders.Value()...) - - cmd := createReexecCommand(args) return cmd.Run() } @@ -137,15 +137,16 @@ func createSonameSymlinksHandler() { } } -// createSonameSymlinks is invoked from a reexec'd handler and provides namespace -// isolation for the operations performed by this hook. -// At the point where this is invoked, we are in a new mount namespace that is -// cloned from the parent. +// createSonameSymlinks ensures that soname symlinks are created in the +// specified directories. +// It is invoked from a reexec'd handler and provides namespace isolation for +// the operations performed by this hook. At the point where this is invoked, +// we are in a new mount namespace that is cloned from the parent. // // args[0] is the reexec initializer function name // args[1] is the path of the ldconfig binary on the host // args[2] is the container root directory -// The remaining args are directories that need to be added to the ldcache. +// The remaining args are directories where soname symlinks need to be created. func createSonameSymlinks(args []string) error { if len(args) < 3 { return fmt.Errorf("incorrect arguments: %v", args) @@ -153,39 +154,13 @@ func createSonameSymlinks(args []string) error { hostLdconfigPath := args[1] containerRootDirPath := args[2] - // To prevent leaking the parent proc filesystem, we create a new proc mount - // in the container root. - if err := mountProc(containerRootDirPath); err != nil { - return fmt.Errorf("error mounting /proc: %w", err) - } - - // We mount the host ldconfig before we pivot root since host paths are not - // visible after the pivot root operation. - ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath) + ldconfig, err := ldconfig.New( + hostLdconfigPath, + containerRootDirPath, + ) if err != nil { - return fmt.Errorf("error mounting host ldconfig: %w", err) + return fmt.Errorf("failed to construct ldconfig runner: %w", err) } - // We pivot to the container root for the new process, this further limits - // access to the host. - if err := pivotRoot(containerRootDirPath); err != nil { - return fmt.Errorf("error running pivot_root: %w", err) - } - - return runLdconfig(ldconfigPath, args[3:]...) -} - -// runLdconfig runs the ldconfig binary and ensures that soname symlinks are -// created in the specified directories. -func runLdconfig(ldconfigPath string, directories ...string) error { - args := []string{ - "ldconfig", - // Explicitly disable updating the LDCache. - "-N", - // Specify -n to only process the specified directories. - "-n", - } - args = append(args, directories...) - - return SafeExec(ldconfigPath, args, nil) + return ldconfig.CreateSonameSymlinks(args[3:]...) } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/container-root.go b/cmd/nvidia-cdi-hook/update-ldcache/container-root.go deleted file mode 100644 index 71a49469..00000000 --- a/cmd/nvidia-cdi-hook/update-ldcache/container-root.go +++ /dev/null @@ -1,46 +0,0 @@ -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package ldcache - -import ( - "os" - "path/filepath" - - "github.com/moby/sys/symlink" -) - -// A containerRoot represents the root filesystem of a container. -type containerRoot string - -// hasPath checks whether the specified path exists in the root. -func (r containerRoot) hasPath(path string) bool { - resolved, err := r.resolve(path) - if err != nil { - return false - } - if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) { - return false - } - return true -} - -// resolve returns the absolute path including root path. -// Symlinks are resolved, but are guaranteed to resolve in the root. -func (r containerRoot) resolve(path string) (string, error) { - absolute := filepath.Clean(filepath.Join(string(r), path)) - return symlink.FollowSymlinkInScope(absolute, string(r)) -} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go b/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go deleted file mode 100644 index c472d14f..00000000 --- a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go +++ /dev/null @@ -1,200 +0,0 @@ -//go:build linux - -/** -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package ldcache - -import ( - "errors" - "fmt" - "os" - "os/exec" - "path/filepath" - "strconv" - "syscall" - - securejoin "github.com/cyphar/filepath-securejoin" - - "github.com/moby/sys/reexec" - "github.com/opencontainers/runc/libcontainer/utils" - "golang.org/x/sys/unix" -) - -// pivotRoot will call pivot_root such that rootfs becomes the new root -// filesystem, and everything else is cleaned up. -// This is adapted from the implementation here: -// -// https://github.com/opencontainers/runc/blob/e89a29929c775025419ab0d218a43588b4c12b9a/libcontainer/rootfs_linux.go#L1056-L1113 -// -// With the `mount` and `unmount` calls changed to direct unix.Mount and unix.Unmount calls. -func pivotRoot(rootfs string) error { - // While the documentation may claim otherwise, pivot_root(".", ".") is - // actually valid. What this results in is / being the new root but - // /proc/self/cwd being the old root. Since we can play around with the cwd - // with pivot_root this allows us to pivot without creating directories in - // the rootfs. Shout-outs to the LXC developers for giving us this idea. - - oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0) - if err != nil { - return &os.PathError{Op: "open", Path: "/", Err: err} - } - defer unix.Close(oldroot) //nolint: errcheck - - newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0) - if err != nil { - return &os.PathError{Op: "open", Path: rootfs, Err: err} - } - defer unix.Close(newroot) //nolint: errcheck - - // Change to the new root so that the pivot_root actually acts on it. - if err := unix.Fchdir(newroot); err != nil { - return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err} - } - - if err := unix.PivotRoot(".", "."); err != nil { - return &os.PathError{Op: "pivot_root", Path: ".", Err: err} - } - - // Currently our "." is oldroot (according to the current kernel code). - // However, purely for safety, we will fchdir(oldroot) since there isn't - // really any guarantee from the kernel what /proc/self/cwd will be after a - // pivot_root(2). - - if err := unix.Fchdir(oldroot); err != nil { - return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err} - } - - // Make oldroot rslave to make sure our unmounts don't propagate to the - // host (and thus bork the machine). We don't use rprivate because this is - // known to cause issues due to races where we still have a reference to a - // mount while a process in the host namespace are trying to operate on - // something they think has no mounts (devicemapper in particular). - if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { - return err - } - // Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. - if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { - return err - } - - // Switch back to our shiny new root. - if err := unix.Chdir("/"); err != nil { - return &os.PathError{Op: "chdir", Path: "/", Err: err} - } - return nil -} - -// mountLdConfig mounts the host ldconfig to the mount namespace of the hook. -// We use WithProcfd to perform the mount operations to ensure that the changes -// are persisted across the pivot root. -func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) { - hostLdconfigInfo, err := os.Stat(hostLdconfigPath) - if err != nil { - return "", fmt.Errorf("error reading host ldconfig: %w", err) - } - - hookScratchDirPath := "/var/run/nvidia-ctk-hook" - ldconfigPath := filepath.Join(hookScratchDirPath, "ldconfig") - if err := utils.MkdirAllInRoot(containerRootDirPath, hookScratchDirPath, 0755); err != nil { - return "", fmt.Errorf("error creating hook scratch folder: %w", err) - } - - err = utils.WithProcfd(containerRootDirPath, hookScratchDirPath, func(hookScratchDirFdPath string) error { - return createTmpFs(hookScratchDirFdPath, int(hostLdconfigInfo.Size())) - - }) - if err != nil { - return "", fmt.Errorf("error creating tmpfs: %w", err) - } - - if _, err := createFileInRoot(containerRootDirPath, ldconfigPath, hostLdconfigInfo.Mode()); err != nil { - return "", fmt.Errorf("error creating ldconfig: %w", err) - } - - err = utils.WithProcfd(containerRootDirPath, ldconfigPath, func(ldconfigFdPath string) error { - return unix.Mount(hostLdconfigPath, ldconfigFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "") - }) - if err != nil { - return "", fmt.Errorf("error bind mounting host ldconfig: %w", err) - } - - return ldconfigPath, nil -} - -func createFileInRoot(containerRootDirPath string, destinationPath string, mode os.FileMode) (string, error) { - dest, err := securejoin.SecureJoin(containerRootDirPath, destinationPath) - if err != nil { - return "", err - } - // Make the parent directory. - destDir, destBase := filepath.Split(dest) - destDirFd, err := utils.MkdirAllInRootOpen(containerRootDirPath, destDir, 0755) - if err != nil { - return "", fmt.Errorf("error creating parent dir: %w", err) - } - defer destDirFd.Close() - // Make the target file. We want to avoid opening any file that is - // already there because it could be a "bad" file like an invalid - // device or hung tty that might cause a DoS, so we use mknodat. - // destBase does not contain any "/" components, and mknodat does - // not follow trailing symlinks, so we can safely just call mknodat - // here. - if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|uint32(mode), 0); err != nil { - // If we get EEXIST, there was already an inode there and - // we can consider that a success. - if !errors.Is(err, unix.EEXIST) { - return "", fmt.Errorf("error creating empty file: %w", err) - } - } - return dest, nil -} - -// mountProc mounts a clean proc filesystem in the new root. -func mountProc(newroot string) error { - target := filepath.Join(newroot, "/proc") - - if err := os.MkdirAll(target, 0755); err != nil { - return fmt.Errorf("error creating directory: %w", err) - } - return unix.Mount("proc", target, "proc", 0, "") -} - -// createTmpFs creates a tmpfs at the specified location with the specified size. -func createTmpFs(target string, size int) error { - return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size)) -} - -// createReexecCommand creates a command that can be used to trigger the reexec -// initializer. -// On linux this command runs in new namespaces. -func createReexecCommand(args []string) *exec.Cmd { - cmd := reexec.Command(args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - cmd.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: syscall.CLONE_NEWNS | - syscall.CLONE_NEWUTS | - syscall.CLONE_NEWIPC | - syscall.CLONE_NEWPID | - syscall.CLONE_NEWNET, - } - - return cmd -} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go index 49b73371..ef614709 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go @@ -21,24 +21,16 @@ import ( "fmt" "log" "os" - "strings" "github.com/moby/sys/reexec" "github.com/urfave/cli/v2" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" ) const ( - // ldsoconfdFilenamePattern specifies the pattern for the filename - // in ld.so.conf.d that includes references to the specified directories. - // The 00-nvcr prefix is chosen to ensure that these libraries have a - // higher precedence than other libraries on the system, but lower than - // the 00-cuda-compat that is included in some containers. - ldsoconfdFilenamePattern = "00-nvcr-*.conf" - reexecUpdateLdCacheCommandName = "reexec-update-ldcache" ) @@ -123,15 +115,15 @@ func (m command) run(c *cli.Context, cfg *options) error { return fmt.Errorf("failed to determined container root: %v", err) } - args := []string{ + cmd, err := ldconfig.NewRunner( reexecUpdateLdCacheCommandName, - strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"), + cfg.ldconfigPath, containerRootDir, + cfg.folders.Value()..., + ) + if err != nil { + return err } - args = append(args, cfg.folders.Value()...) - - cmd := createReexecCommand(args) - return cmd.Run() } @@ -143,15 +135,16 @@ func updateLdCacheHandler() { } } -// updateLdCache is invoked from a reexec'd handler and provides namespace -// isolation for the operations performed by this hook. -// At the point where this is invoked, we are in a new mount namespace that is -// cloned from the parent. +// updateLdCache ensures that the ldcache in the container is updated to include +// libraries that are mounted from the host. +// It is invoked from a reexec'd handler and provides namespace isolation for +// the operations performed by this hook. At the point where this is invoked, +// we are in a new mount namespace that is cloned from the parent. // // args[0] is the reexec initializer function name // args[1] is the path of the ldconfig binary on the host // args[2] is the container root directory -// The remaining args are folders that need to be added to the ldcache. +// The remaining args are folders where soname symlinks need to be created. func updateLdCache(args []string) error { if len(args) < 3 { return fmt.Errorf("incorrect arguments: %v", args) @@ -159,97 +152,13 @@ func updateLdCache(args []string) error { hostLdconfigPath := args[1] containerRootDirPath := args[2] - // To prevent leaking the parent proc filesystem, we create a new proc mount - // in the container root. - if err := mountProc(containerRootDirPath); err != nil { - return fmt.Errorf("error mounting /proc: %w", err) - } - - // We mount the host ldconfig before we pivot root since host paths are not - // visible after the pivot root operation. - ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath) + ldconfig, err := ldconfig.New( + hostLdconfigPath, + containerRootDirPath, + ) if err != nil { - return fmt.Errorf("error mounting host ldconfig: %w", err) + return fmt.Errorf("failed to construct ldconfig runner: %w", err) } - // We pivot to the container root for the new process, this further limits - // access to the host. - if err := pivotRoot(containerRootDirPath); err != nil { - return fmt.Errorf("error running pivot_root: %w", err) - } - - return runLdconfig(ldconfigPath, args[3:]...) -} - -// runLdconfig runs the ldconfig binary and ensures that the specified directories -// are processed for the ldcache. -func runLdconfig(ldconfigPath string, directories ...string) error { - args := []string{ - "ldconfig", - // Explicitly specify using /etc/ld.so.conf since the host's ldconfig may - // be configured to use a different config file by default. - // Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is - // in the container. - "-f", "/etc/ld.so.conf", - } - - containerRoot := containerRoot("/") - - if containerRoot.hasPath("/etc/ld.so.cache") { - args = append(args, "-C", "/etc/ld.so.cache") - } else { - args = append(args, "-N") - } - - if containerRoot.hasPath("/etc/ld.so.conf.d") { - err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...) - if err != nil { - return fmt.Errorf("failed to update ld.so.conf.d: %w", err) - } - } else { - args = append(args, directories...) - } - - return SafeExec(ldconfigPath, args, nil) -} - -// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/. -// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and -// contains the specified directories on each line. -func createLdsoconfdFile(pattern string, dirs ...string) error { - if len(dirs) == 0 { - return nil - } - - ldsoconfdDir := "/etc/ld.so.conf.d" - if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { - return fmt.Errorf("failed to create ld.so.conf.d: %w", err) - } - - configFile, err := os.CreateTemp(ldsoconfdDir, pattern) - if err != nil { - return fmt.Errorf("failed to create config file: %w", err) - } - defer func() { - _ = configFile.Close() - }() - - added := make(map[string]bool) - for _, dir := range dirs { - if added[dir] { - continue - } - _, err = fmt.Fprintf(configFile, "%s\n", dir) - if err != nil { - return fmt.Errorf("failed to update config file: %w", err) - } - added[dir] = true - } - - // The created file needs to be world readable for the cases where the container is run as a non-root user. - if err := configFile.Chmod(0644); err != nil { - return fmt.Errorf("failed to chmod config file: %w", err) - } - - return nil + return ldconfig.UpdateLDCache(args[3:]...) } diff --git a/internal/ldconfig/ldconfig.go b/internal/ldconfig/ldconfig.go new file mode 100644 index 00000000..f3db1a77 --- /dev/null +++ b/internal/ldconfig/ldconfig.go @@ -0,0 +1,206 @@ +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package ldconfig + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" +) + +const ( + // ldsoconfdFilenamePattern specifies the pattern for the filename + // in ld.so.conf.d that includes references to the specified directories. + // The 00-nvcr prefix is chosen to ensure that these libraries have a + // higher precedence than other libraries on the system, but lower than + // the 00-cuda-compat that is included in some containers. + ldsoconfdFilenamePattern = "00-nvcr-*.conf" +) + +type Ldconfig struct { + ldconfigPath string + inRoot string +} + +// NewRunner creates an exec.Cmd that can be used to run ldconfig. +func NewRunner(id string, ldconfigPath string, containerRoot string, additionalargs ...string) (*exec.Cmd, error) { + args := []string{ + id, + strings.TrimPrefix(config.NormalizeLDConfigPath("@"+ldconfigPath), "@"), + containerRoot, + } + args = append(args, additionalargs...) + + return createReexecCommand(args) +} + +// New creates an Ldconfig struct that is used to perform operations on the +// ldcache and libraries in a particular root (e.g. a container). +func New(ldconfigPath string, inRoot string) (*Ldconfig, error) { + l := &Ldconfig{ + ldconfigPath: ldconfigPath, + inRoot: inRoot, + } + if ldconfigPath == "" { + return nil, fmt.Errorf("an ldconfig path must be specified") + } + if inRoot == "" || inRoot == "/" { + return nil, fmt.Errorf("ldconfig must be run in the non-system root") + } + return l, nil +} + +// CreateSonameSymlinks uses ldconfig to create the soname symlinks in the +// specified directories. +func (l *Ldconfig) CreateSonameSymlinks(directories ...string) error { + if len(directories) == 0 { + return nil + } + ldconfigPath, err := l.prepareRoot() + if err != nil { + return err + } + + args := []string{ + filepath.Base(ldconfigPath), + // Explicitly disable updating the LDCache. + "-N", + // Specify -n to only process the specified directories. + "-n", + } + args = append(args, directories...) + + return SafeExec(ldconfigPath, args, nil) +} + +func (l *Ldconfig) UpdateLDCache(directories ...string) error { + ldconfigPath, err := l.prepareRoot() + if err != nil { + return err + } + + args := []string{ + filepath.Base(ldconfigPath), + // Explicitly specify using /etc/ld.so.conf since the host's ldconfig may + // be configured to use a different config file by default. + "-f", "/etc/ld.so.conf", + } + + if l.ldcacheExists() { + args = append(args, "-C", "/etc/ld.so.cache") + } else { + args = append(args, "-N") + } + + // If the ld.so.conf.d directory exists, we create a config file there + // containing the required directories, otherwise we add the specified + // directories to the ldconfig command directly. + if l.ldsoconfdDirectoryExists() { + err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...) + if err != nil { + return fmt.Errorf("failed to update ld.so.conf.d: %w", err) + } + } else { + args = append(args, directories...) + } + + return SafeExec(ldconfigPath, args, nil) +} + +func (l *Ldconfig) prepareRoot() (string, error) { + // To prevent leaking the parent proc filesystem, we create a new proc mount + // in the specified root. + if err := mountProc(l.inRoot); err != nil { + return "", fmt.Errorf("error mounting /proc: %w", err) + } + + // We mount the host ldconfig before we pivot root since host paths are not + // visible after the pivot root operation. + ldconfigPath, err := mountLdConfig(l.ldconfigPath, l.inRoot) + if err != nil { + return "", fmt.Errorf("error mounting host ldconfig: %w", err) + } + + // We pivot to the container root for the new process, this further limits + // access to the host. + if err := pivotRoot(l.inRoot); err != nil { + return "", fmt.Errorf("error running pivot_root: %w", err) + } + + return ldconfigPath, nil +} + +func (l *Ldconfig) ldcacheExists() bool { + if _, err := os.Stat("/etc/ld.so.cache"); err != nil && os.IsNotExist(err) { + return false + } + return true +} + +func (l *Ldconfig) ldsoconfdDirectoryExists() bool { + info, err := os.Stat("/etc/ld.so.conf.d") + if os.IsNotExist(err) { + return false + } + return info.IsDir() +} + +// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/. +// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and +// contains the specified directories on each line. +func createLdsoconfdFile(pattern string, dirs ...string) error { + if len(dirs) == 0 { + return nil + } + + ldsoconfdDir := "/etc/ld.so.conf.d" + if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { + return fmt.Errorf("failed to create ld.so.conf.d: %w", err) + } + + configFile, err := os.CreateTemp(ldsoconfdDir, pattern) + if err != nil { + return fmt.Errorf("failed to create config file: %w", err) + } + defer func() { + _ = configFile.Close() + }() + + added := make(map[string]bool) + for _, dir := range dirs { + if added[dir] { + continue + } + _, err = fmt.Fprintf(configFile, "%s\n", dir) + if err != nil { + return fmt.Errorf("failed to update config file: %w", err) + } + added[dir] = true + } + + // The created file needs to be world readable for the cases where the container is run as a non-root user. + if err := configFile.Chmod(0644); err != nil { + return fmt.Errorf("failed to chmod config file: %w", err) + } + + return nil +} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go b/internal/ldconfig/ldconfig_linux.go similarity index 98% rename from cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go rename to internal/ldconfig/ldconfig_linux.go index ffa88b41..79702b11 100644 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go +++ b/internal/ldconfig/ldconfig_linux.go @@ -17,7 +17,7 @@ # limitations under the License. **/ -package create_soname_symlinks +package ldconfig import ( "errors" @@ -29,8 +29,8 @@ import ( "syscall" securejoin "github.com/cyphar/filepath-securejoin" - "github.com/moby/sys/reexec" + "github.com/opencontainers/runc/libcontainer/utils" "golang.org/x/sys/unix" ) @@ -182,7 +182,7 @@ func createTmpFs(target string, size int) error { // createReexecCommand creates a command that can be used to trigger the reexec // initializer. // On linux this command runs in new namespaces. -func createReexecCommand(args []string) *exec.Cmd { +func createReexecCommand(args []string) (*exec.Cmd, error) { cmd := reexec.Command(args...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout @@ -196,5 +196,5 @@ func createReexecCommand(args []string) *exec.Cmd { syscall.CLONE_NEWNET, } - return cmd + return cmd, nil } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go b/internal/ldconfig/ldconfig_other.go similarity index 76% rename from cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go rename to internal/ldconfig/ldconfig_other.go index a6c35261..c5d452a2 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go +++ b/internal/ldconfig/ldconfig_other.go @@ -17,14 +17,11 @@ # limitations under the License. **/ -package ldcache +package ldconfig import ( "fmt" - "os" "os/exec" - - "github.com/moby/sys/reexec" ) func pivotRoot(newroot string) error { @@ -39,13 +36,6 @@ func mountProc(newroot string) error { return fmt.Errorf("not supported") } -// createReexecCommand creates a command that can be used ot trigger the reexec -// initializer. -func createReexecCommand(args []string) *exec.Cmd { - cmd := reexec.Command(args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - return cmd +func createReexecCommand(args []string) (*exec.Cmd, error) { + return nil, fmt.Errorf("not supported") } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go b/internal/ldconfig/safe-exec_linux.go similarity index 98% rename from cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go rename to internal/ldconfig/safe-exec_linux.go index 790ac6a8..09b6cc22 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go +++ b/internal/ldconfig/safe-exec_linux.go @@ -16,7 +16,7 @@ # limitations under the License. **/ -package ldcache +package ldconfig import ( "fmt" diff --git a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go b/internal/ldconfig/safe-exec_other.go similarity index 98% rename from cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go rename to internal/ldconfig/safe-exec_other.go index 71f9ddb5..3d017645 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go +++ b/internal/ldconfig/safe-exec_other.go @@ -16,7 +16,7 @@ # limitations under the License. **/ -package ldcache +package ldconfig import "syscall"