diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go deleted file mode 100644 index 86fc44c8..00000000 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_other.go +++ /dev/null @@ -1,51 +0,0 @@ -//go:build !linux - -/** -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package create_soname_symlinks - -import ( - "fmt" - "os" - "os/exec" - - "github.com/moby/sys/reexec" -) - -func pivotRoot(newroot string) error { - return fmt.Errorf("not supported") -} - -func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) { - return "", fmt.Errorf("not supported") -} - -func mountProc(newroot string) error { - return fmt.Errorf("not supported") -} - -// createReexecCommand creates a command that can be used ot trigger the reexec -// initializer. -func createReexecCommand(args []string) *exec.Cmd { - cmd := reexec.Command(args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - return cmd -} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go deleted file mode 100644 index 6efb2cc9..00000000 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_linux.go +++ /dev/null @@ -1,58 +0,0 @@ -//go:build linux - -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package create_soname_symlinks - -import ( - "fmt" - "os" - "strconv" - "syscall" - - "github.com/opencontainers/runc/libcontainer/exeseal" -) - -// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it. -func SafeExec(path string, args []string, envv []string) error { - safeExe, err := cloneBinary(path) - if err != nil { - //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection - return syscall.Exec(path, args, envv) - } - defer safeExe.Close() - - exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd())) - //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection - return syscall.Exec(exePath, args, envv) -} - -func cloneBinary(path string) (*os.File, error) { - exe, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("opening current binary: %w", err) - } - defer exe.Close() - - stat, err := exe.Stat() - if err != nil { - return nil, fmt.Errorf("checking %v size: %w", path, err) - } - size := stat.Size() - - return exeseal.CloneBinary(exe, size, path, os.TempDir()) -} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go deleted file mode 100644 index d997fd8f..00000000 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/safe-exec_other.go +++ /dev/null @@ -1,28 +0,0 @@ -//go:build !linux - -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package create_soname_symlinks - -import "syscall" - -// SafeExec is not implemented on non-linux systems and forwards directly to the -// Exec syscall. -func SafeExec(path string, args []string, envv []string) error { - //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection - return syscall.Exec(path, args, envv) -} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go index 524a5536..7f1da580 100644 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go @@ -22,12 +22,11 @@ import ( "fmt" "log" "os" - "strings" "github.com/moby/sys/reexec" "github.com/urfave/cli/v2" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" ) @@ -117,14 +116,15 @@ func (m command) run(c *cli.Context, cfg *options) error { return fmt.Errorf("failed to determined container root: %v", err) } - args := []string{ + cmd, err := ldconfig.NewRunner( reexecUpdateLdCacheCommandName, - strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"), + cfg.ldconfigPath, containerRootDir, + cfg.folders.Value()..., + ) + if err != nil { + return err } - args = append(args, cfg.folders.Value()...) - - cmd := createReexecCommand(args) return cmd.Run() } @@ -137,15 +137,16 @@ func createSonameSymlinksHandler() { } } -// createSonameSymlinks is invoked from a reexec'd handler and provides namespace -// isolation for the operations performed by this hook. -// At the point where this is invoked, we are in a new mount namespace that is -// cloned from the parent. +// createSonameSymlinks ensures that soname symlinks are created in the +// specified directories. +// It is invoked from a reexec'd handler and provides namespace isolation for +// the operations performed by this hook. At the point where this is invoked, +// we are in a new mount namespace that is cloned from the parent. // // args[0] is the reexec initializer function name // args[1] is the path of the ldconfig binary on the host // args[2] is the container root directory -// The remaining args are directories that need to be added to the ldcache. +// The remaining args are directories where soname symlinks need to be created. func createSonameSymlinks(args []string) error { if len(args) < 3 { return fmt.Errorf("incorrect arguments: %v", args) @@ -153,39 +154,13 @@ func createSonameSymlinks(args []string) error { hostLdconfigPath := args[1] containerRootDirPath := args[2] - // To prevent leaking the parent proc filesystem, we create a new proc mount - // in the container root. - if err := mountProc(containerRootDirPath); err != nil { - return fmt.Errorf("error mounting /proc: %w", err) - } - - // We mount the host ldconfig before we pivot root since host paths are not - // visible after the pivot root operation. - ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath) + ldconfig, err := ldconfig.New( + hostLdconfigPath, + containerRootDirPath, + ) if err != nil { - return fmt.Errorf("error mounting host ldconfig: %w", err) + return fmt.Errorf("failed to construct ldconfig runner: %w", err) } - // We pivot to the container root for the new process, this further limits - // access to the host. - if err := pivotRoot(containerRootDirPath); err != nil { - return fmt.Errorf("error running pivot_root: %w", err) - } - - return runLdconfig(ldconfigPath, args[3:]...) -} - -// runLdconfig runs the ldconfig binary and ensures that soname symlinks are -// created in the specified directories. -func runLdconfig(ldconfigPath string, directories ...string) error { - args := []string{ - "ldconfig", - // Explicitly disable updating the LDCache. - "-N", - // Specify -n to only process the specified directories. - "-n", - } - args = append(args, directories...) - - return SafeExec(ldconfigPath, args, nil) + return ldconfig.CreateSonameSymlinks(args[3:]...) } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/container-root.go b/cmd/nvidia-cdi-hook/update-ldcache/container-root.go deleted file mode 100644 index 71a49469..00000000 --- a/cmd/nvidia-cdi-hook/update-ldcache/container-root.go +++ /dev/null @@ -1,46 +0,0 @@ -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package ldcache - -import ( - "os" - "path/filepath" - - "github.com/moby/sys/symlink" -) - -// A containerRoot represents the root filesystem of a container. -type containerRoot string - -// hasPath checks whether the specified path exists in the root. -func (r containerRoot) hasPath(path string) bool { - resolved, err := r.resolve(path) - if err != nil { - return false - } - if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) { - return false - } - return true -} - -// resolve returns the absolute path including root path. -// Symlinks are resolved, but are guaranteed to resolve in the root. -func (r containerRoot) resolve(path string) (string, error) { - absolute := filepath.Clean(filepath.Join(string(r), path)) - return symlink.FollowSymlinkInScope(absolute, string(r)) -} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go b/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go deleted file mode 100644 index c472d14f..00000000 --- a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_linux.go +++ /dev/null @@ -1,200 +0,0 @@ -//go:build linux - -/** -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package ldcache - -import ( - "errors" - "fmt" - "os" - "os/exec" - "path/filepath" - "strconv" - "syscall" - - securejoin "github.com/cyphar/filepath-securejoin" - - "github.com/moby/sys/reexec" - "github.com/opencontainers/runc/libcontainer/utils" - "golang.org/x/sys/unix" -) - -// pivotRoot will call pivot_root such that rootfs becomes the new root -// filesystem, and everything else is cleaned up. -// This is adapted from the implementation here: -// -// https://github.com/opencontainers/runc/blob/e89a29929c775025419ab0d218a43588b4c12b9a/libcontainer/rootfs_linux.go#L1056-L1113 -// -// With the `mount` and `unmount` calls changed to direct unix.Mount and unix.Unmount calls. -func pivotRoot(rootfs string) error { - // While the documentation may claim otherwise, pivot_root(".", ".") is - // actually valid. What this results in is / being the new root but - // /proc/self/cwd being the old root. Since we can play around with the cwd - // with pivot_root this allows us to pivot without creating directories in - // the rootfs. Shout-outs to the LXC developers for giving us this idea. - - oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0) - if err != nil { - return &os.PathError{Op: "open", Path: "/", Err: err} - } - defer unix.Close(oldroot) //nolint: errcheck - - newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0) - if err != nil { - return &os.PathError{Op: "open", Path: rootfs, Err: err} - } - defer unix.Close(newroot) //nolint: errcheck - - // Change to the new root so that the pivot_root actually acts on it. - if err := unix.Fchdir(newroot); err != nil { - return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err} - } - - if err := unix.PivotRoot(".", "."); err != nil { - return &os.PathError{Op: "pivot_root", Path: ".", Err: err} - } - - // Currently our "." is oldroot (according to the current kernel code). - // However, purely for safety, we will fchdir(oldroot) since there isn't - // really any guarantee from the kernel what /proc/self/cwd will be after a - // pivot_root(2). - - if err := unix.Fchdir(oldroot); err != nil { - return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err} - } - - // Make oldroot rslave to make sure our unmounts don't propagate to the - // host (and thus bork the machine). We don't use rprivate because this is - // known to cause issues due to races where we still have a reference to a - // mount while a process in the host namespace are trying to operate on - // something they think has no mounts (devicemapper in particular). - if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { - return err - } - // Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. - if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { - return err - } - - // Switch back to our shiny new root. - if err := unix.Chdir("/"); err != nil { - return &os.PathError{Op: "chdir", Path: "/", Err: err} - } - return nil -} - -// mountLdConfig mounts the host ldconfig to the mount namespace of the hook. -// We use WithProcfd to perform the mount operations to ensure that the changes -// are persisted across the pivot root. -func mountLdConfig(hostLdconfigPath string, containerRootDirPath string) (string, error) { - hostLdconfigInfo, err := os.Stat(hostLdconfigPath) - if err != nil { - return "", fmt.Errorf("error reading host ldconfig: %w", err) - } - - hookScratchDirPath := "/var/run/nvidia-ctk-hook" - ldconfigPath := filepath.Join(hookScratchDirPath, "ldconfig") - if err := utils.MkdirAllInRoot(containerRootDirPath, hookScratchDirPath, 0755); err != nil { - return "", fmt.Errorf("error creating hook scratch folder: %w", err) - } - - err = utils.WithProcfd(containerRootDirPath, hookScratchDirPath, func(hookScratchDirFdPath string) error { - return createTmpFs(hookScratchDirFdPath, int(hostLdconfigInfo.Size())) - - }) - if err != nil { - return "", fmt.Errorf("error creating tmpfs: %w", err) - } - - if _, err := createFileInRoot(containerRootDirPath, ldconfigPath, hostLdconfigInfo.Mode()); err != nil { - return "", fmt.Errorf("error creating ldconfig: %w", err) - } - - err = utils.WithProcfd(containerRootDirPath, ldconfigPath, func(ldconfigFdPath string) error { - return unix.Mount(hostLdconfigPath, ldconfigFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "") - }) - if err != nil { - return "", fmt.Errorf("error bind mounting host ldconfig: %w", err) - } - - return ldconfigPath, nil -} - -func createFileInRoot(containerRootDirPath string, destinationPath string, mode os.FileMode) (string, error) { - dest, err := securejoin.SecureJoin(containerRootDirPath, destinationPath) - if err != nil { - return "", err - } - // Make the parent directory. - destDir, destBase := filepath.Split(dest) - destDirFd, err := utils.MkdirAllInRootOpen(containerRootDirPath, destDir, 0755) - if err != nil { - return "", fmt.Errorf("error creating parent dir: %w", err) - } - defer destDirFd.Close() - // Make the target file. We want to avoid opening any file that is - // already there because it could be a "bad" file like an invalid - // device or hung tty that might cause a DoS, so we use mknodat. - // destBase does not contain any "/" components, and mknodat does - // not follow trailing symlinks, so we can safely just call mknodat - // here. - if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|uint32(mode), 0); err != nil { - // If we get EEXIST, there was already an inode there and - // we can consider that a success. - if !errors.Is(err, unix.EEXIST) { - return "", fmt.Errorf("error creating empty file: %w", err) - } - } - return dest, nil -} - -// mountProc mounts a clean proc filesystem in the new root. -func mountProc(newroot string) error { - target := filepath.Join(newroot, "/proc") - - if err := os.MkdirAll(target, 0755); err != nil { - return fmt.Errorf("error creating directory: %w", err) - } - return unix.Mount("proc", target, "proc", 0, "") -} - -// createTmpFs creates a tmpfs at the specified location with the specified size. -func createTmpFs(target string, size int) error { - return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size)) -} - -// createReexecCommand creates a command that can be used to trigger the reexec -// initializer. -// On linux this command runs in new namespaces. -func createReexecCommand(args []string) *exec.Cmd { - cmd := reexec.Command(args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - cmd.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: syscall.CLONE_NEWNS | - syscall.CLONE_NEWUTS | - syscall.CLONE_NEWIPC | - syscall.CLONE_NEWPID | - syscall.CLONE_NEWNET, - } - - return cmd -} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go index 49b73371..ef614709 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go @@ -21,24 +21,16 @@ import ( "fmt" "log" "os" - "strings" "github.com/moby/sys/reexec" "github.com/urfave/cli/v2" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" ) const ( - // ldsoconfdFilenamePattern specifies the pattern for the filename - // in ld.so.conf.d that includes references to the specified directories. - // The 00-nvcr prefix is chosen to ensure that these libraries have a - // higher precedence than other libraries on the system, but lower than - // the 00-cuda-compat that is included in some containers. - ldsoconfdFilenamePattern = "00-nvcr-*.conf" - reexecUpdateLdCacheCommandName = "reexec-update-ldcache" ) @@ -123,15 +115,15 @@ func (m command) run(c *cli.Context, cfg *options) error { return fmt.Errorf("failed to determined container root: %v", err) } - args := []string{ + cmd, err := ldconfig.NewRunner( reexecUpdateLdCacheCommandName, - strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"), + cfg.ldconfigPath, containerRootDir, + cfg.folders.Value()..., + ) + if err != nil { + return err } - args = append(args, cfg.folders.Value()...) - - cmd := createReexecCommand(args) - return cmd.Run() } @@ -143,15 +135,16 @@ func updateLdCacheHandler() { } } -// updateLdCache is invoked from a reexec'd handler and provides namespace -// isolation for the operations performed by this hook. -// At the point where this is invoked, we are in a new mount namespace that is -// cloned from the parent. +// updateLdCache ensures that the ldcache in the container is updated to include +// libraries that are mounted from the host. +// It is invoked from a reexec'd handler and provides namespace isolation for +// the operations performed by this hook. At the point where this is invoked, +// we are in a new mount namespace that is cloned from the parent. // // args[0] is the reexec initializer function name // args[1] is the path of the ldconfig binary on the host // args[2] is the container root directory -// The remaining args are folders that need to be added to the ldcache. +// The remaining args are folders where soname symlinks need to be created. func updateLdCache(args []string) error { if len(args) < 3 { return fmt.Errorf("incorrect arguments: %v", args) @@ -159,97 +152,13 @@ func updateLdCache(args []string) error { hostLdconfigPath := args[1] containerRootDirPath := args[2] - // To prevent leaking the parent proc filesystem, we create a new proc mount - // in the container root. - if err := mountProc(containerRootDirPath); err != nil { - return fmt.Errorf("error mounting /proc: %w", err) - } - - // We mount the host ldconfig before we pivot root since host paths are not - // visible after the pivot root operation. - ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath) + ldconfig, err := ldconfig.New( + hostLdconfigPath, + containerRootDirPath, + ) if err != nil { - return fmt.Errorf("error mounting host ldconfig: %w", err) + return fmt.Errorf("failed to construct ldconfig runner: %w", err) } - // We pivot to the container root for the new process, this further limits - // access to the host. - if err := pivotRoot(containerRootDirPath); err != nil { - return fmt.Errorf("error running pivot_root: %w", err) - } - - return runLdconfig(ldconfigPath, args[3:]...) -} - -// runLdconfig runs the ldconfig binary and ensures that the specified directories -// are processed for the ldcache. -func runLdconfig(ldconfigPath string, directories ...string) error { - args := []string{ - "ldconfig", - // Explicitly specify using /etc/ld.so.conf since the host's ldconfig may - // be configured to use a different config file by default. - // Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is - // in the container. - "-f", "/etc/ld.so.conf", - } - - containerRoot := containerRoot("/") - - if containerRoot.hasPath("/etc/ld.so.cache") { - args = append(args, "-C", "/etc/ld.so.cache") - } else { - args = append(args, "-N") - } - - if containerRoot.hasPath("/etc/ld.so.conf.d") { - err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...) - if err != nil { - return fmt.Errorf("failed to update ld.so.conf.d: %w", err) - } - } else { - args = append(args, directories...) - } - - return SafeExec(ldconfigPath, args, nil) -} - -// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/. -// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and -// contains the specified directories on each line. -func createLdsoconfdFile(pattern string, dirs ...string) error { - if len(dirs) == 0 { - return nil - } - - ldsoconfdDir := "/etc/ld.so.conf.d" - if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { - return fmt.Errorf("failed to create ld.so.conf.d: %w", err) - } - - configFile, err := os.CreateTemp(ldsoconfdDir, pattern) - if err != nil { - return fmt.Errorf("failed to create config file: %w", err) - } - defer func() { - _ = configFile.Close() - }() - - added := make(map[string]bool) - for _, dir := range dirs { - if added[dir] { - continue - } - _, err = fmt.Fprintf(configFile, "%s\n", dir) - if err != nil { - return fmt.Errorf("failed to update config file: %w", err) - } - added[dir] = true - } - - // The created file needs to be world readable for the cases where the container is run as a non-root user. - if err := configFile.Chmod(0644); err != nil { - return fmt.Errorf("failed to chmod config file: %w", err) - } - - return nil + return ldconfig.UpdateLDCache(args[3:]...) } diff --git a/internal/ldconfig/ldconfig.go b/internal/ldconfig/ldconfig.go new file mode 100644 index 00000000..f3db1a77 --- /dev/null +++ b/internal/ldconfig/ldconfig.go @@ -0,0 +1,206 @@ +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package ldconfig + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" +) + +const ( + // ldsoconfdFilenamePattern specifies the pattern for the filename + // in ld.so.conf.d that includes references to the specified directories. + // The 00-nvcr prefix is chosen to ensure that these libraries have a + // higher precedence than other libraries on the system, but lower than + // the 00-cuda-compat that is included in some containers. + ldsoconfdFilenamePattern = "00-nvcr-*.conf" +) + +type Ldconfig struct { + ldconfigPath string + inRoot string +} + +// NewRunner creates an exec.Cmd that can be used to run ldconfig. +func NewRunner(id string, ldconfigPath string, containerRoot string, additionalargs ...string) (*exec.Cmd, error) { + args := []string{ + id, + strings.TrimPrefix(config.NormalizeLDConfigPath("@"+ldconfigPath), "@"), + containerRoot, + } + args = append(args, additionalargs...) + + return createReexecCommand(args) +} + +// New creates an Ldconfig struct that is used to perform operations on the +// ldcache and libraries in a particular root (e.g. a container). +func New(ldconfigPath string, inRoot string) (*Ldconfig, error) { + l := &Ldconfig{ + ldconfigPath: ldconfigPath, + inRoot: inRoot, + } + if ldconfigPath == "" { + return nil, fmt.Errorf("an ldconfig path must be specified") + } + if inRoot == "" || inRoot == "/" { + return nil, fmt.Errorf("ldconfig must be run in the non-system root") + } + return l, nil +} + +// CreateSonameSymlinks uses ldconfig to create the soname symlinks in the +// specified directories. +func (l *Ldconfig) CreateSonameSymlinks(directories ...string) error { + if len(directories) == 0 { + return nil + } + ldconfigPath, err := l.prepareRoot() + if err != nil { + return err + } + + args := []string{ + filepath.Base(ldconfigPath), + // Explicitly disable updating the LDCache. + "-N", + // Specify -n to only process the specified directories. + "-n", + } + args = append(args, directories...) + + return SafeExec(ldconfigPath, args, nil) +} + +func (l *Ldconfig) UpdateLDCache(directories ...string) error { + ldconfigPath, err := l.prepareRoot() + if err != nil { + return err + } + + args := []string{ + filepath.Base(ldconfigPath), + // Explicitly specify using /etc/ld.so.conf since the host's ldconfig may + // be configured to use a different config file by default. + "-f", "/etc/ld.so.conf", + } + + if l.ldcacheExists() { + args = append(args, "-C", "/etc/ld.so.cache") + } else { + args = append(args, "-N") + } + + // If the ld.so.conf.d directory exists, we create a config file there + // containing the required directories, otherwise we add the specified + // directories to the ldconfig command directly. + if l.ldsoconfdDirectoryExists() { + err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...) + if err != nil { + return fmt.Errorf("failed to update ld.so.conf.d: %w", err) + } + } else { + args = append(args, directories...) + } + + return SafeExec(ldconfigPath, args, nil) +} + +func (l *Ldconfig) prepareRoot() (string, error) { + // To prevent leaking the parent proc filesystem, we create a new proc mount + // in the specified root. + if err := mountProc(l.inRoot); err != nil { + return "", fmt.Errorf("error mounting /proc: %w", err) + } + + // We mount the host ldconfig before we pivot root since host paths are not + // visible after the pivot root operation. + ldconfigPath, err := mountLdConfig(l.ldconfigPath, l.inRoot) + if err != nil { + return "", fmt.Errorf("error mounting host ldconfig: %w", err) + } + + // We pivot to the container root for the new process, this further limits + // access to the host. + if err := pivotRoot(l.inRoot); err != nil { + return "", fmt.Errorf("error running pivot_root: %w", err) + } + + return ldconfigPath, nil +} + +func (l *Ldconfig) ldcacheExists() bool { + if _, err := os.Stat("/etc/ld.so.cache"); err != nil && os.IsNotExist(err) { + return false + } + return true +} + +func (l *Ldconfig) ldsoconfdDirectoryExists() bool { + info, err := os.Stat("/etc/ld.so.conf.d") + if os.IsNotExist(err) { + return false + } + return info.IsDir() +} + +// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/. +// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and +// contains the specified directories on each line. +func createLdsoconfdFile(pattern string, dirs ...string) error { + if len(dirs) == 0 { + return nil + } + + ldsoconfdDir := "/etc/ld.so.conf.d" + if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { + return fmt.Errorf("failed to create ld.so.conf.d: %w", err) + } + + configFile, err := os.CreateTemp(ldsoconfdDir, pattern) + if err != nil { + return fmt.Errorf("failed to create config file: %w", err) + } + defer func() { + _ = configFile.Close() + }() + + added := make(map[string]bool) + for _, dir := range dirs { + if added[dir] { + continue + } + _, err = fmt.Fprintf(configFile, "%s\n", dir) + if err != nil { + return fmt.Errorf("failed to update config file: %w", err) + } + added[dir] = true + } + + // The created file needs to be world readable for the cases where the container is run as a non-root user. + if err := configFile.Chmod(0644); err != nil { + return fmt.Errorf("failed to chmod config file: %w", err) + } + + return nil +} diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go b/internal/ldconfig/ldconfig_linux.go similarity index 98% rename from cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go rename to internal/ldconfig/ldconfig_linux.go index ffa88b41..79702b11 100644 --- a/cmd/nvidia-cdi-hook/create-soname-symlinks/ldconfig_linux.go +++ b/internal/ldconfig/ldconfig_linux.go @@ -17,7 +17,7 @@ # limitations under the License. **/ -package create_soname_symlinks +package ldconfig import ( "errors" @@ -29,8 +29,8 @@ import ( "syscall" securejoin "github.com/cyphar/filepath-securejoin" - "github.com/moby/sys/reexec" + "github.com/opencontainers/runc/libcontainer/utils" "golang.org/x/sys/unix" ) @@ -182,7 +182,7 @@ func createTmpFs(target string, size int) error { // createReexecCommand creates a command that can be used to trigger the reexec // initializer. // On linux this command runs in new namespaces. -func createReexecCommand(args []string) *exec.Cmd { +func createReexecCommand(args []string) (*exec.Cmd, error) { cmd := reexec.Command(args...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout @@ -196,5 +196,5 @@ func createReexecCommand(args []string) *exec.Cmd { syscall.CLONE_NEWNET, } - return cmd + return cmd, nil } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go b/internal/ldconfig/ldconfig_other.go similarity index 76% rename from cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go rename to internal/ldconfig/ldconfig_other.go index a6c35261..c5d452a2 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/ldconfig_other.go +++ b/internal/ldconfig/ldconfig_other.go @@ -17,14 +17,11 @@ # limitations under the License. **/ -package ldcache +package ldconfig import ( "fmt" - "os" "os/exec" - - "github.com/moby/sys/reexec" ) func pivotRoot(newroot string) error { @@ -39,13 +36,6 @@ func mountProc(newroot string) error { return fmt.Errorf("not supported") } -// createReexecCommand creates a command that can be used ot trigger the reexec -// initializer. -func createReexecCommand(args []string) *exec.Cmd { - cmd := reexec.Command(args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - return cmd +func createReexecCommand(args []string) (*exec.Cmd, error) { + return nil, fmt.Errorf("not supported") } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go b/internal/ldconfig/safe-exec_linux.go similarity index 98% rename from cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go rename to internal/ldconfig/safe-exec_linux.go index 790ac6a8..09b6cc22 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go +++ b/internal/ldconfig/safe-exec_linux.go @@ -16,7 +16,7 @@ # limitations under the License. **/ -package ldcache +package ldconfig import ( "fmt" diff --git a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go b/internal/ldconfig/safe-exec_other.go similarity index 98% rename from cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go rename to internal/ldconfig/safe-exec_other.go index 71f9ddb5..3d017645 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go +++ b/internal/ldconfig/safe-exec_other.go @@ -16,7 +16,7 @@ # limitations under the License. **/ -package ldcache +package ldconfig import "syscall"