Use libcontainer execseal to run ldconfig

This change copies ldconfig into a memfd before executing it from the createContainer hook. Signed-off-by: Evan Lezar <elezar@nvidia.com>
2025-05-02 03:04:25 +00:00 · 2025-02-25 16:58:30 +02:00 · 2025-02-25 16:58:30 +02:00 · 52b9631333
commit 52b9631333
parent 9429fbac5f
34 changed files with 3939 additions and 6 deletions
--- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go
+++ b/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go
@ -0,0 +1,57 @@
+/**
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package ldcache
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"syscall"
+
+	"github.com/opencontainers/runc/libcontainer/dmz"
+)
+
+// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it.
+func (m command) SafeExec(path string, args []string, envv []string) error {
+	safeExe, err := cloneBinary(path)
+	if err != nil {
+		m.logger.Warningf("Failed to clone binary %q: %v; falling back to Exec", path, err)
+		//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
+		return syscall.Exec(path, args, envv)
+	}
+	defer safeExe.Close()
+
+	exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
+	//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
+	return syscall.Exec(exePath, args, envv)
+}
+
+func cloneBinary(path string) (*os.File, error) {
+	exe, err := os.Open(path)
+	if err != nil {
+		return nil, fmt.Errorf("opening current binary: %w", err)
+	}
+	defer exe.Close()
+
+	stat, err := exe.Stat()
+	if err != nil {
+		return nil, fmt.Errorf("checking %v size: %w", path, err)
+	}
+	size := stat.Size()
+
+	return dmz.CloneBinary(exe, size, path, os.TempDir())
+}
--- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go
+++ b/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go
@ -0,0 +1,29 @@
+//go:build !linux
+// +build !linux
+
+/**
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package ldcache
+
+import "syscall"
+
+// SafeExec is not implemented on non-linux systems and forwards directly to the
+// Exec syscall.
+func (m *command) SafeExec(path string, args []string, envv []string) error {
+	//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
+	return syscall.Exec(path, args, envv)
+}
--- a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go
+++ b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go
@ -22,7 +22,6 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
-	"syscall"

 	"github.com/urfave/cli/v2"

@ -143,8 +142,7 @@ func (m command) run(c *cli.Context, cfg *options) error {
 	// be configured to use a different config file by default.
 	args = append(args, "-f", "/etc/ld.so.conf")

-	//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
-	return syscall.Exec(ldconfigPath, args, nil)
+	return m.SafeExec(ldconfigPath, args, nil)
 }

 // resolveLDConfigPath determines the LDConfig path to use for the system.
--- a/go.mod
+++ b/go.mod
@ -6,6 +6,7 @@ require (
 	github.com/NVIDIA/go-nvlib v0.7.1
 	github.com/NVIDIA/go-nvml v0.12.4-1
 	github.com/moby/sys/symlink v0.3.0
+	github.com/opencontainers/runc v1.2.5
 	github.com/opencontainers/runtime-spec v1.2.0
 	github.com/pelletier/go-toml v1.9.5
 	github.com/sirupsen/logrus v1.9.3
@ -19,13 +20,13 @@ require (

 require (
 	github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
+	github.com/cyphar/filepath-securejoin v0.4.1 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/fsnotify/fsnotify v1.7.0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/hashicorp/errwrap v1.1.0 // indirect
 	github.com/kr/pretty v0.3.1 // indirect
 	github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
-	github.com/opencontainers/selinux v1.11.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
--- a/go.sum
+++ b/go.sum
@ -7,6 +7,8 @@ github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2y
 github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
 github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
+github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@ -31,6 +33,8 @@ github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34
 github.com/moby/sys/symlink v0.3.0 h1:GZX89mEZ9u53f97npBy4Rc3vJKj7JBDj/PN2I22GrNU=
 github.com/moby/sys/symlink v0.3.0/go.mod h1:3eNdhduHmYPcgsJtZXW1W4XUJdZGBIkttZ8xKqPUJq0=
 github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
+github.com/opencontainers/runc v1.2.5 h1:8KAkq3Wrem8bApgOHyhRI/8IeLXIfmZ6Qaw6DNSLnA4=
+github.com/opencontainers/runc v1.2.5/go.mod h1:dOQeFo29xZKBNeRBI0B19mJtfHv68YgCTh1X+YphA+4=
 github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
 github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
--- a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
+++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
@ -0,0 +1,256 @@
+# Changelog #
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](http://semver.org/).
+
+## [Unreleased] ##
+
+## [0.4.1] - 2025-01-28 ##
+
+### Fixed ###
+- The restrictions added for `root` paths passed to `SecureJoin` in 0.4.0 was
+  found to be too strict and caused some regressions when folks tried to
+  update, so this restriction has been relaxed to only return an error if the
+  path contains a `..` component. We still recommend users use `filepath.Clean`
+  (and even `filepath.EvalSymlinks`) on the `root` path they are using, but at
+  least you will no longer be punished for "trivial" unclean paths.
+
+## [0.4.0] - 2025-01-13 ##
+
+### Breaking ####
+- `SecureJoin(VFS)` will now return an error if the provided `root` is not a
+  `filepath.Clean`'d path.
+
+  While it is ultimately the responsibility of the caller to ensure the root is
+  a safe path to use, passing a path like `/symlink/..` as a root would result
+  in the `SecureJoin`'d path being placed in `/` even though `/symlink/..`
+  might be a different directory, and so we should more strongly discourage
+  such usage.
+
+  All major users of `securejoin.SecureJoin` already ensure that the paths they
+  provide are safe (and this is ultimately a question of user error), but
+  removing this foot-gun is probably a good idea. Of course, this is
+  necessarily a breaking API change (though we expect no real users to be
+  affected by it).
+
+  Thanks to [Erik Sjölund](https://github.com/eriksjolund), who initially
+  reported this issue as a possible security issue.
+
+- `MkdirAll` and `MkdirHandle` now take an `os.FileMode`-style mode argument
+  instead of a raw `unix.S_*`-style mode argument, which may cause compile-time
+  type errors depending on how you use `filepath-securejoin`. For most users,
+  there will be no change in behaviour aside from the type change (as the
+  bottom `0o777` bits are the same in both formats, and most users are probably
+  only using those bits).
+
+  However, if you were using `unix.S_ISVTX` to set the sticky bit with
+  `MkdirAll(Handle)` you will need to switch to `os.ModeSticky` otherwise you
+  will get a runtime error with this update. In addition, the error message you
+  will get from passing `unix.S_ISUID` and `unix.S_ISGID` will be different as
+  they are treated as invalid bits now (note that previously passing said bits
+  was also an error).
+
+## [0.3.6] - 2024-12-17 ##
+
+### Compatibility ###
+- The minimum Go version requirement for `filepath-securejoin` is now Go 1.18
+  (we use generics internally).
+
+  For reference, `filepath-securejoin@v0.3.0` somewhat-arbitrarily bumped the
+  Go version requirement to 1.21.
+
+  While we did make some use of Go 1.21 stdlib features (and in principle Go
+  versions <= 1.21 are no longer even supported by upstream anymore), some
+  downstreams have complained that the version bump has meant that they have to
+  do workarounds when backporting fixes that use the new `filepath-securejoin`
+  API onto old branches. This is not an ideal situation, but since using this
+  library is probably better for most downstreams than a hand-rolled
+  workaround, we now have compatibility shims that allow us to build on older
+  Go versions.
+- Lower minimum version requirement for `golang.org/x/sys` to `v0.18.0` (we
+  need the wrappers for `fsconfig(2)`), which should also make backporting
+  patches to older branches easier.
+
+## [0.3.5] - 2024-12-06 ##
+
+### Fixed ###
+- `MkdirAll` will now no longer return an `EEXIST` error if two racing
+  processes are creating the same directory. We will still verify that the path
+  is a directory, but this will avoid spurious errors when multiple threads or
+  programs are trying to `MkdirAll` the same path. opencontainers/runc#4543
+
+## [0.3.4] - 2024-10-09 ##
+
+### Fixed ###
+- Previously, some testing mocks we had resulted in us doing `import "testing"`
+  in non-`_test.go` code, which made some downstreams like Kubernetes unhappy.
+  This has been fixed. (#32)
+
+## [0.3.3] - 2024-09-30 ##
+
+### Fixed ###
+- The mode and owner verification logic in `MkdirAll` has been removed. This
+  was originally intended to protect against some theoretical attacks but upon
+  further consideration these protections don't actually buy us anything and
+  they were causing spurious errors with more complicated filesystem setups.
+- The "is the created directory empty" logic in `MkdirAll` has also been
+  removed. This was not causing us issues yet, but some pseudofilesystems (such
+  as `cgroup`) create non-empty directories and so this logic would've been
+  wrong for such cases.
+
+## [0.3.2] - 2024-09-13 ##
+
+### Changed ###
+- Passing the `S_ISUID` or `S_ISGID` modes to `MkdirAllInRoot` will now return
+  an explicit error saying that those bits are ignored by `mkdirat(2)`. In the
+  past a different error was returned, but since the silent ignoring behaviour
+  is codified in the man pages a more explicit error seems apt. While silently
+  ignoring these bits would be the most compatible option, it could lead to
+  users thinking their code sets these bits when it doesn't. Programs that need
+  to deal with compatibility can mask the bits themselves. (#23, #25)
+
+### Fixed ###
+- If a directory has `S_ISGID` set, then all child directories will have
+  `S_ISGID` set when created and a different gid will be used for any inode
+  created under the directory. Previously, the "expected owner and mode"
+  validation in `securejoin.MkdirAll` did not correctly handle this. We now
+  correctly handle this case. (#24, #25)
+
+## [0.3.1] - 2024-07-23 ##
+
+### Changed ###
+- By allowing `Open(at)InRoot` to opt-out of the extra work done by `MkdirAll`
+  to do the necessary "partial lookups", `Open(at)InRoot` now does less work
+  for both implementations (resulting in a many-fold decrease in the number of
+  operations for `openat2`, and a modest improvement for non-`openat2`) and is
+  far more guaranteed to match the correct `openat2(RESOLVE_IN_ROOT)`
+  behaviour.
+- We now use `readlinkat(fd, "")` where possible. For `Open(at)InRoot` this
+  effectively just means that we no longer risk getting spurious errors during
+  rename races. However, for our hardened procfs handler, this in theory should
+  prevent mount attacks from tricking us when doing magic-link readlinks (even
+  when using the unsafe host `/proc` handle). Unfortunately `Reopen` is still
+  potentially vulnerable to those kinds of somewhat-esoteric attacks.
+
+  Technically this [will only work on post-2.6.39 kernels][linux-readlinkat-emptypath]
+  but it seems incredibly unlikely anyone is using `filepath-securejoin` on a
+  pre-2011 kernel.
+
+### Fixed ###
+- Several improvements were made to the errors returned by `Open(at)InRoot` and
+  `MkdirAll` when dealing with invalid paths under the emulated (ie.
+  non-`openat2`) implementation. Previously, some paths would return the wrong
+  error (`ENOENT` when the last component was a non-directory), and other paths
+  would be returned as though they were acceptable (trailing-slash components
+  after a non-directory would be ignored by `Open(at)InRoot`).
+
+  These changes were done to match `openat2`'s behaviour and purely is a
+  consistency fix (most users are going to be using `openat2` anyway).
+
+[linux-readlinkat-emptypath]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=65cfc6722361570bfe255698d9cd4dccaf47570d
+
+## [0.3.0] - 2024-07-11 ##
+
+### Added ###
+- A new set of `*os.File`-based APIs have been added. These are adapted from
+  [libpathrs][] and we strongly suggest using them if possible (as they provide
+  far more protection against attacks than `SecureJoin`):
+
+   - `Open(at)InRoot` resolves a path inside a rootfs and returns an `*os.File`
+     handle to the path. Note that the handle returned is an `O_PATH` handle,
+     which cannot be used for reading or writing (as well as some other
+     operations -- [see open(2) for more details][open.2])
+
+   - `Reopen` takes an `O_PATH` file handle and safely re-opens it to upgrade
+     it to a regular handle. This can also be used with non-`O_PATH` handles,
+     but `O_PATH` is the most obvious application.
+
+   - `MkdirAll` is an implementation of `os.MkdirAll` that is safe to use to
+     create a directory tree within a rootfs.
+
+  As these are new APIs, they may change in the future. However, they should be
+  safe to start migrating to as we have extensive tests ensuring they behave
+  correctly and are safe against various races and other attacks.
+
+[libpathrs]: https://github.com/openSUSE/libpathrs
+[open.2]: https://www.man7.org/linux/man-pages/man2/open.2.html
+
+## [0.2.5] - 2024-05-03 ##
+
+### Changed ###
+- Some minor changes were made to how lexical components (like `..` and `.`)
+  are handled during path generation in `SecureJoin`. There is no behaviour
+  change as a result of this fix (the resulting paths are the same).
+
+### Fixed ###
+- The error returned when we hit a symlink loop now references the correct
+  path. (#10)
+
+## [0.2.4] - 2023-09-06 ##
+
+### Security ###
+- This release fixes a potential security issue in filepath-securejoin when
+  used on Windows ([GHSA-6xv5-86q9-7xr8][], which could be used to generate
+  paths outside of the provided rootfs in certain cases), as well as improving
+  the overall behaviour of filepath-securejoin when dealing with Windows paths
+  that contain volume names. Thanks to Paulo Gomes for discovering and fixing
+  these issues.
+
+### Fixed ###
+- Switch to GitHub Actions for CI so we can test on Windows as well as Linux
+  and MacOS.
+
+[GHSA-6xv5-86q9-7xr8]: https://github.com/advisories/GHSA-6xv5-86q9-7xr8
+
+## [0.2.3] - 2021-06-04 ##
+
+### Changed ###
+- Switch to Go 1.13-style `%w` error wrapping, letting us drop the dependency
+  on `github.com/pkg/errors`.
+
+## [0.2.2] - 2018-09-05 ##
+
+### Changed ###
+- Use `syscall.ELOOP` as the base error for symlink loops, rather than our own
+  (internal) error. This allows callers to more easily use `errors.Is` to check
+  for this case.
+
+## [0.2.1] - 2018-09-05 ##
+
+### Fixed ###
+- Use our own `IsNotExist` implementation, which lets us handle `ENOTDIR`
+  properly within `SecureJoin`.
+
+## [0.2.0] - 2017-07-19 ##
+
+We now have 100% test coverage!
+
+### Added ###
+- Add a `SecureJoinVFS` API that can be used for mocking (as we do in our new
+  tests) or for implementing custom handling of lookup operations (such as for
+  rootless containers, where work is necessary to access directories with weird
+  modes because we don't have `CAP_DAC_READ_SEARCH` or `CAP_DAC_OVERRIDE`).
+
+## 0.1.0 - 2017-07-19
+
+This is our first release of `github.com/cyphar/filepath-securejoin`,
+containing a full implementation with a coverage of 93.5% (the only missing
+cases are the error cases, which are hard to mocktest at the moment).
+
+[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...HEAD
+[0.4.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.0...v0.4.1
+[0.4.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.6...v0.4.0
+[0.3.6]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.5...v0.3.6
+[0.3.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.4...v0.3.5
+[0.3.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.3...v0.3.4
+[0.3.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.2...v0.3.3
+[0.3.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...v0.3.2
+[0.3.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.0...v0.3.1
+[0.3.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.5...v0.3.0
+[0.2.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.4...v0.2.5
+[0.2.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.3...v0.2.4
+[0.2.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.2...v0.2.3
+[0.2.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.1...v0.2.2
+[0.2.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.0...v0.2.1
+[0.2.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.1.0...v0.2.0
--- a/vendor/github.com/cyphar/filepath-securejoin/LICENSE
+++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE
@ -0,0 +1,28 @@
+Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
+Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/github.com/cyphar/filepath-securejoin/README.md
+++ b/vendor/github.com/cyphar/filepath-securejoin/README.md
@ -0,0 +1,169 @@
+## `filepath-securejoin` ##
+
+[![Go Documentation](https://pkg.go.dev/badge/github.com/cyphar/filepath-securejoin.svg)](https://pkg.go.dev/github.com/cyphar/filepath-securejoin)
+[![Build Status](https://github.com/cyphar/filepath-securejoin/actions/workflows/ci.yml/badge.svg)](https://github.com/cyphar/filepath-securejoin/actions/workflows/ci.yml)
+
+### Old API ###
+
+This library was originally just an implementation of `SecureJoin` which was
+[intended to be included in the Go standard library][go#20126] as a safer
+`filepath.Join` that would restrict the path lookup to be inside a root
+directory.
+
+The implementation was based on code that existed in several container
+runtimes. Unfortunately, this API is **fundamentally unsafe** against attackers
+that can modify path components after `SecureJoin` returns and before the
+caller uses the path, allowing for some fairly trivial TOCTOU attacks.
+
+`SecureJoin` (and `SecureJoinVFS`) are still provided by this library to
+support legacy users, but new users are strongly suggested to avoid using
+`SecureJoin` and instead use the [new api](#new-api) or switch to
+[libpathrs][libpathrs].
+
+With the above limitations in mind, this library guarantees the following:
+
+* If no error is set, the resulting string **must** be a child path of
+  `root` and will not contain any symlink path components (they will all be
+  expanded).
+
+* When expanding symlinks, all symlink path components **must** be resolved
+  relative to the provided root. In particular, this can be considered a
+  userspace implementation of how `chroot(2)` operates on file paths. Note that
+  these symlinks will **not** be expanded lexically (`filepath.Clean` is not
+  called on the input before processing).
+
+* Non-existent path components are unaffected by `SecureJoin` (similar to
+  `filepath.EvalSymlinks`'s semantics).
+
+* The returned path will always be `filepath.Clean`ed and thus not contain any
+  `..` components.
+
+A (trivial) implementation of this function on GNU/Linux systems could be done
+with the following (note that this requires root privileges and is far more
+opaque than the implementation in this library, and also requires that
+`readlink` is inside the `root` path and is trustworthy):
+
+```go
+package securejoin
+
+import (
+	"os/exec"
+	"path/filepath"
+)
+
+func SecureJoin(root, unsafePath string) (string, error) {
+	unsafePath = string(filepath.Separator) + unsafePath
+	cmd := exec.Command("chroot", root,
+		"readlink", "--canonicalize-missing", "--no-newline", unsafePath)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", err
+	}
+	expanded := string(output)
+	return filepath.Join(root, expanded), nil
+}
+```
+
+[libpathrs]: https://github.com/openSUSE/libpathrs
+[go#20126]: https://github.com/golang/go/issues/20126
+
+### New API ###
+
+While we recommend users switch to [libpathrs][libpathrs] as soon as it has a
+stable release, some methods implemented by libpathrs have been ported to this
+library to ease the transition. These APIs are only supported on Linux.
+
+These APIs are implemented such that `filepath-securejoin` will
+opportunistically use certain newer kernel APIs that make these operations far
+more secure. In particular:
+
+* All of the lookup operations will use [`openat2`][openat2.2] on new enough
+  kernels (Linux 5.6 or later) to restrict lookups through magic-links and
+  bind-mounts (for certain operations) and to make use of `RESOLVE_IN_ROOT` to
+  efficiently resolve symlinks within a rootfs.
+
+* The APIs provide hardening against a malicious `/proc` mount to either detect
+  or avoid being tricked by a `/proc` that is not legitimate. This is done
+  using [`openat2`][openat2.2] for all users, and privileged users will also be
+  further protected by using [`fsopen`][fsopen.2] and [`open_tree`][open_tree.2]
+  (Linux 5.2 or later).
+
+[openat2.2]: https://www.man7.org/linux/man-pages/man2/openat2.2.html
+[fsopen.2]: https://github.com/brauner/man-pages-md/blob/main/fsopen.md
+[open_tree.2]: https://github.com/brauner/man-pages-md/blob/main/open_tree.md
+
+#### `OpenInRoot` ####
+
+```go
+func OpenInRoot(root, unsafePath string) (*os.File, error)
+func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error)
+func Reopen(handle *os.File, flags int) (*os.File, error)
+```
+
+`OpenInRoot` is a much safer version of
+
+```go
+path, err := securejoin.SecureJoin(root, unsafePath)
+file, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
+```
+
+that protects against various race attacks that could lead to serious security
+issues, depending on the application. Note that the returned `*os.File` is an
+`O_PATH` file descriptor, which is quite restricted. Callers will probably need
+to use `Reopen` to get a more usable handle (this split is done to provide
+useful features like PTY spawning and to avoid users accidentally opening bad
+inodes that could cause a DoS).
+
+Callers need to be careful in how they use the returned `*os.File`. Usually it
+is only safe to operate on the handle directly, and it is very easy to create a
+security issue. [libpathrs][libpathrs] provides far more helpers to make using
+these handles safer -- there is currently no plan to port them to
+`filepath-securejoin`.
+
+`OpenatInRoot` is like `OpenInRoot` except that the root is provided using an
+`*os.File`. This allows you to ensure that multiple `OpenatInRoot` (or
+`MkdirAllHandle`) calls are operating on the same rootfs.
+
+> **NOTE**: Unlike `SecureJoin`, `OpenInRoot` will error out as soon as it hits
+> a dangling symlink or non-existent path. This is in contrast to `SecureJoin`
+> which treated non-existent components as though they were real directories,
+> and would allow for partial resolution of dangling symlinks. These behaviours
+> are at odds with how Linux treats non-existent paths and dangling symlinks,
+> and so these are no longer allowed.
+
+#### `MkdirAll` ####
+
+```go
+func MkdirAll(root, unsafePath string, mode int) error
+func MkdirAllHandle(root *os.File, unsafePath string, mode int) (*os.File, error)
+```
+
+`MkdirAll` is a much safer version of
+
+```go
+path, err := securejoin.SecureJoin(root, unsafePath)
+err = os.MkdirAll(path, mode)
+```
+
+that protects against the same kinds of races that `OpenInRoot` protects
+against.
+
+`MkdirAllHandle` is like `MkdirAll` except that the root is provided using an
+`*os.File` (the reason for this is the same as with `OpenatInRoot`) and an
+`*os.File` of the final created directory is returned (this directory is
+guaranteed to be effectively identical to the directory created by
+`MkdirAllHandle`, which is not possible to ensure by just using `OpenatInRoot`
+after `MkdirAll`).
+
+> **NOTE**: Unlike `SecureJoin`, `MkdirAll` will error out as soon as it hits
+> a dangling symlink or non-existent path. This is in contrast to `SecureJoin`
+> which treated non-existent components as though they were real directories,
+> and would allow for partial resolution of dangling symlinks. These behaviours
+> are at odds with how Linux treats non-existent paths and dangling symlinks,
+> and so these are no longer allowed. This means that `MkdirAll` will not
+> create non-existent directories referenced by a dangling symlink.
+
+### License ###
+
+The license of this project is the same as Go, which is a BSD 3-clause license
+available in the `LICENSE` file.
--- a/vendor/github.com/cyphar/filepath-securejoin/VERSION
+++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION
@ -0,0 +1 @@
+0.4.1
--- a/vendor/github.com/cyphar/filepath-securejoin/doc.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/doc.go
@ -0,0 +1,39 @@
+// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
+// Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package securejoin implements a set of helpers to make it easier to write Go
+// code that is safe against symlink-related escape attacks. The primary idea
+// is to let you resolve a path within a rootfs directory as if the rootfs was
+// a chroot.
+//
+// securejoin has two APIs, a "legacy" API and a "modern" API.
+//
+// The legacy API is [SecureJoin] and [SecureJoinVFS]. These methods are
+// **not** safe against race conditions where an attacker changes the
+// filesystem after (or during) the [SecureJoin] operation.
+//
+// The new API is made up of [OpenInRoot] and [MkdirAll] (and derived
+// functions). These are safe against racing attackers and have several other
+// protections that are not provided by the legacy API. There are many more
+// operations that most programs expect to be able to do safely, but we do not
+// provide explicit support for them because we want to encourage users to
+// switch to [libpathrs](https://github.com/openSUSE/libpathrs) which is a
+// cross-language next-generation library that is entirely designed around
+// operating on paths safely.
+//
+// securejoin has been used by several container runtimes (Docker, runc,
+// Kubernetes, etc) for quite a few years as a de-facto standard for operating
+// on container filesystem paths "safely". However, most users still use the
+// legacy API which is unsafe against various attacks (there is a fairly long
+// history of CVEs in dependent as a result). Users should switch to the modern
+// API as soon as possible (or even better, switch to libpathrs).
+//
+// This project was initially intended to be included in the Go standard
+// library, but [it was rejected](https://go.dev/issue/20126). There is now a
+// [new Go proposal](https://go.dev/issue/67002) for a safe path resolution API
+// that shares some of the goals of filepath-securejoin. However, that design
+// is intended to work like `openat2(RESOLVE_BENEATH)` which does not fit the
+// usecase of container runtimes and most system tools.
+package securejoin
--- a/vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_go120.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_go120.go
@ -0,0 +1,18 @@
+//go:build linux && go1.20
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"fmt"
+)
+
+// wrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except
+// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap)
+// is only guaranteed to give you baseErr.
+func wrapBaseError(baseErr, extraErr error) error {
+	return fmt.Errorf("%w: %w", extraErr, baseErr)
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_unsupported.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_unsupported.go
@ -0,0 +1,38 @@
+//go:build linux && !go1.20
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"fmt"
+)
+
+type wrappedError struct {
+	inner   error
+	isError error
+}
+
+func (err wrappedError) Is(target error) bool {
+	return err.isError == target
+}
+
+func (err wrappedError) Unwrap() error {
+	return err.inner
+}
+
+func (err wrappedError) Error() string {
+	return fmt.Sprintf("%v: %v", err.isError, err.inner)
+}
+
+// wrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except
+// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap)
+// is only guaranteed to give you baseErr.
+func wrapBaseError(baseErr, extraErr error) error {
+	return wrappedError{
+		inner:   baseErr,
+		isError: extraErr,
+	}
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_go121.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_go121.go
@ -0,0 +1,32 @@
+//go:build linux && go1.21
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"slices"
+	"sync"
+)
+
+func slices_DeleteFunc[S ~[]E, E any](slice S, delFn func(E) bool) S {
+	return slices.DeleteFunc(slice, delFn)
+}
+
+func slices_Contains[S ~[]E, E comparable](slice S, val E) bool {
+	return slices.Contains(slice, val)
+}
+
+func slices_Clone[S ~[]E, E any](slice S) S {
+	return slices.Clone(slice)
+}
+
+func sync_OnceValue[T any](f func() T) func() T {
+	return sync.OnceValue(f)
+}
+
+func sync_OnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) {
+	return sync.OnceValues(f)
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_unsupported.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_unsupported.go
@ -0,0 +1,124 @@
+//go:build linux && !go1.21
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"sync"
+)
+
+// These are very minimal implementations of functions that appear in Go 1.21's
+// stdlib, included so that we can build on older Go versions. Most are
+// borrowed directly from the stdlib, and a few are modified to be "obviously
+// correct" without needing to copy too many other helpers.
+
+// clearSlice is equivalent to the builtin clear from Go 1.21.
+// Copied from the Go 1.24 stdlib implementation.
+func clearSlice[S ~[]E, E any](slice S) {
+	var zero E
+	for i := range slice {
+		slice[i] = zero
+	}
+}
+
+// Copied from the Go 1.24 stdlib implementation.
+func slices_IndexFunc[S ~[]E, E any](s S, f func(E) bool) int {
+	for i := range s {
+		if f(s[i]) {
+			return i
+		}
+	}
+	return -1
+}
+
+// Copied from the Go 1.24 stdlib implementation.
+func slices_DeleteFunc[S ~[]E, E any](s S, del func(E) bool) S {
+	i := slices_IndexFunc(s, del)
+	if i == -1 {
+		return s
+	}
+	// Don't start copying elements until we find one to delete.
+	for j := i + 1; j < len(s); j++ {
+		if v := s[j]; !del(v) {
+			s[i] = v
+			i++
+		}
+	}
+	clearSlice(s[i:]) // zero/nil out the obsolete elements, for GC
+	return s[:i]
+}
+
+// Similar to the stdlib slices.Contains, except that we don't have
+// slices.Index so we need to use slices.IndexFunc for this non-Func helper.
+func slices_Contains[S ~[]E, E comparable](s S, v E) bool {
+	return slices_IndexFunc(s, func(e E) bool { return e == v }) >= 0
+}
+
+// Copied from the Go 1.24 stdlib implementation.
+func slices_Clone[S ~[]E, E any](s S) S {
+	// Preserve nil in case it matters.
+	if s == nil {
+		return nil
+	}
+	return append(S([]E{}), s...)
+}
+
+// Copied from the Go 1.24 stdlib implementation.
+func sync_OnceValue[T any](f func() T) func() T {
+	var (
+		once   sync.Once
+		valid  bool
+		p      any
+		result T
+	)
+	g := func() {
+		defer func() {
+			p = recover()
+			if !valid {
+				panic(p)
+			}
+		}()
+		result = f()
+		f = nil
+		valid = true
+	}
+	return func() T {
+		once.Do(g)
+		if !valid {
+			panic(p)
+		}
+		return result
+	}
+}
+
+// Copied from the Go 1.24 stdlib implementation.
+func sync_OnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) {
+	var (
+		once  sync.Once
+		valid bool
+		p     any
+		r1    T1
+		r2    T2
+	)
+	g := func() {
+		defer func() {
+			p = recover()
+			if !valid {
+				panic(p)
+			}
+		}()
+		r1, r2 = f()
+		f = nil
+		valid = true
+	}
+	return func() (T1, T2) {
+		once.Do(g)
+		if !valid {
+			panic(p)
+		}
+		return r1, r2
+	}
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/join.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/join.go
@ -0,0 +1,166 @@
+// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
+// Copyright (C) 2017-2025 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"errors"
+	"os"
+	"path/filepath"
+	"strings"
+	"syscall"
+)
+
+const maxSymlinkLimit = 255
+
+// IsNotExist tells you if err is an error that implies that either the path
+// accessed does not exist (or path components don't exist). This is
+// effectively a more broad version of [os.IsNotExist].
+func IsNotExist(err error) bool {
+	// Check that it's not actually an ENOTDIR, which in some cases is a more
+	// convoluted case of ENOENT (usually involving weird paths).
+	return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT)
+}
+
+// errUnsafeRoot is returned if the user provides SecureJoinVFS with a path
+// that contains ".." components.
+var errUnsafeRoot = errors.New("root path provided to SecureJoin contains '..' components")
+
+// stripVolume just gets rid of the Windows volume included in a path. Based on
+// some godbolt tests, the Go compiler is smart enough to make this a no-op on
+// Linux.
+func stripVolume(path string) string {
+	return path[len(filepath.VolumeName(path)):]
+}
+
+// hasDotDot checks if the path contains ".." components in a platform-agnostic
+// way.
+func hasDotDot(path string) bool {
+	// If we are on Windows, strip any volume letters. It turns out that
+	// C:..\foo may (or may not) be a valid pathname and we need to handle that
+	// leading "..".
+	path = stripVolume(path)
+	// Look for "/../" in the path, but we need to handle leading and trailing
+	// ".."s by adding separators. Doing this with filepath.Separator is ugly
+	// so just convert to Unix-style "/" first.
+	path = filepath.ToSlash(path)
+	return strings.Contains("/"+path+"/", "/../")
+}
+
+// SecureJoinVFS joins the two given path components (similar to [filepath.Join]) except
+// that the returned path is guaranteed to be scoped inside the provided root
+// path (when evaluated). Any symbolic links in the path are evaluated with the
+// given root treated as the root of the filesystem, similar to a chroot. The
+// filesystem state is evaluated through the given [VFS] interface (if nil, the
+// standard [os].* family of functions are used).
+//
+// Note that the guarantees provided by this function only apply if the path
+// components in the returned string are not modified (in other words are not
+// replaced with symlinks on the filesystem) after this function has returned.
+// Such a symlink race is necessarily out-of-scope of SecureJoinVFS.
+//
+// NOTE: Due to the above limitation, Linux users are strongly encouraged to
+// use [OpenInRoot] instead, which does safely protect against these kinds of
+// attacks. There is no way to solve this problem with SecureJoinVFS because
+// the API is fundamentally wrong (you cannot return a "safe" path string and
+// guarantee it won't be modified afterwards).
+//
+// Volume names in unsafePath are always discarded, regardless if they are
+// provided via direct input or when evaluating symlinks. Therefore:
+//
+// "C:\Temp" + "D:\path\to\file.txt" results in "C:\Temp\path\to\file.txt"
+//
+// If the provided root is not [filepath.Clean] then an error will be returned,
+// as such root paths are bordering on somewhat unsafe and using such paths is
+// not best practice. We also strongly suggest that any root path is first
+// fully resolved using [filepath.EvalSymlinks] or otherwise constructed to
+// avoid containing symlink components. Of course, the root also *must not* be
+// attacker-controlled.
+func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) {
+	// The root path must not contain ".." components, otherwise when we join
+	// the subpath we will end up with a weird path. We could work around this
+	// in other ways but users shouldn't be giving us non-lexical root paths in
+	// the first place.
+	if hasDotDot(root) {
+		return "", errUnsafeRoot
+	}
+
+	// Use the os.* VFS implementation if none was specified.
+	if vfs == nil {
+		vfs = osVFS{}
+	}
+
+	unsafePath = filepath.FromSlash(unsafePath)
+	var (
+		currentPath   string
+		remainingPath = unsafePath
+		linksWalked   int
+	)
+	for remainingPath != "" {
+		// On Windows, if we managed to end up at a path referencing a volume,
+		// drop the volume to make sure we don't end up with broken paths or
+		// escaping the root volume.
+		remainingPath = stripVolume(remainingPath)
+
+		// Get the next path component.
+		var part string
+		if i := strings.IndexRune(remainingPath, filepath.Separator); i == -1 {
+			part, remainingPath = remainingPath, ""
+		} else {
+			part, remainingPath = remainingPath[:i], remainingPath[i+1:]
+		}
+
+		// Apply the component lexically to the path we are building.
+		// currentPath does not contain any symlinks, and we are lexically
+		// dealing with a single component, so it's okay to do a filepath.Clean
+		// here.
+		nextPath := filepath.Join(string(filepath.Separator), currentPath, part)
+		if nextPath == string(filepath.Separator) {
+			currentPath = ""
+			continue
+		}
+		fullPath := root + string(filepath.Separator) + nextPath
+
+		// Figure out whether the path is a symlink.
+		fi, err := vfs.Lstat(fullPath)
+		if err != nil && !IsNotExist(err) {
+			return "", err
+		}
+		// Treat non-existent path components the same as non-symlinks (we
+		// can't do any better here).
+		if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 {
+			currentPath = nextPath
+			continue
+		}
+
+		// It's a symlink, so get its contents and expand it by prepending it
+		// to the yet-unparsed path.
+		linksWalked++
+		if linksWalked > maxSymlinkLimit {
+			return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP}
+		}
+
+		dest, err := vfs.Readlink(fullPath)
+		if err != nil {
+			return "", err
+		}
+		remainingPath = dest + string(filepath.Separator) + remainingPath
+		// Absolute symlinks reset any work we've already done.
+		if filepath.IsAbs(dest) {
+			currentPath = ""
+		}
+	}
+
+	// There should be no lexical components like ".." left in the path here,
+	// but for safety clean up the path before joining it to the root.
+	finalPath := filepath.Join(string(filepath.Separator), currentPath)
+	return filepath.Join(root, finalPath), nil
+}
+
+// SecureJoin is a wrapper around [SecureJoinVFS] that just uses the [os].* library
+// of functions as the [VFS]. If in doubt, use this function over [SecureJoinVFS].
+func SecureJoin(root, unsafePath string) (string, error) {
+	return SecureJoinVFS(root, unsafePath, nil)
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go
@ -0,0 +1,388 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+
+	"golang.org/x/sys/unix"
+)
+
+type symlinkStackEntry struct {
+	// (dir, remainingPath) is what we would've returned if the link didn't
+	// exist. This matches what openat2(RESOLVE_IN_ROOT) would return in
+	// this case.
+	dir           *os.File
+	remainingPath string
+	// linkUnwalked is the remaining path components from the original
+	// Readlink which we have yet to walk. When this slice is empty, we
+	// drop the link from the stack.
+	linkUnwalked []string
+}
+
+func (se symlinkStackEntry) String() string {
+	return fmt.Sprintf("<%s>/%s [->%s]", se.dir.Name(), se.remainingPath, strings.Join(se.linkUnwalked, "/"))
+}
+
+func (se symlinkStackEntry) Close() {
+	_ = se.dir.Close()
+}
+
+type symlinkStack []*symlinkStackEntry
+
+func (s *symlinkStack) IsEmpty() bool {
+	return s == nil || len(*s) == 0
+}
+
+func (s *symlinkStack) Close() {
+	if s != nil {
+		for _, link := range *s {
+			link.Close()
+		}
+		// TODO: Switch to clear once we switch to Go 1.21.
+		*s = nil
+	}
+}
+
+var (
+	errEmptyStack         = errors.New("[internal] stack is empty")
+	errBrokenSymlinkStack = errors.New("[internal error] broken symlink stack")
+)
+
+func (s *symlinkStack) popPart(part string) error {
+	if s == nil || s.IsEmpty() {
+		// If there is nothing in the symlink stack, then the part was from the
+		// real path provided by the user, and this is a no-op.
+		return errEmptyStack
+	}
+	if part == "." {
+		// "." components are no-ops -- we drop them when doing SwapLink.
+		return nil
+	}
+
+	tailEntry := (*s)[len(*s)-1]
+
+	// Double-check that we are popping the component we expect.
+	if len(tailEntry.linkUnwalked) == 0 {
+		return fmt.Errorf("%w: trying to pop component %q of empty stack entry %s", errBrokenSymlinkStack, part, tailEntry)
+	}
+	headPart := tailEntry.linkUnwalked[0]
+	if headPart != part {
+		return fmt.Errorf("%w: trying to pop component %q but the last stack entry is %s (%q)", errBrokenSymlinkStack, part, tailEntry, headPart)
+	}
+
+	// Drop the component, but keep the entry around in case we are dealing
+	// with a "tail-chained" symlink.
+	tailEntry.linkUnwalked = tailEntry.linkUnwalked[1:]
+	return nil
+}
+
+func (s *symlinkStack) PopPart(part string) error {
+	if err := s.popPart(part); err != nil {
+		if errors.Is(err, errEmptyStack) {
+			// Skip empty stacks.
+			err = nil
+		}
+		return err
+	}
+
+	// Clean up any of the trailing stack entries that are empty.
+	for lastGood := len(*s) - 1; lastGood >= 0; lastGood-- {
+		entry := (*s)[lastGood]
+		if len(entry.linkUnwalked) > 0 {
+			break
+		}
+		entry.Close()
+		(*s) = (*s)[:lastGood]
+	}
+	return nil
+}
+
+func (s *symlinkStack) push(dir *os.File, remainingPath, linkTarget string) error {
+	if s == nil {
+		return nil
+	}
+	// Split the link target and clean up any "" parts.
+	linkTargetParts := slices_DeleteFunc(
+		strings.Split(linkTarget, "/"),
+		func(part string) bool { return part == "" || part == "." })
+
+	// Copy the directory so the caller doesn't close our copy.
+	dirCopy, err := dupFile(dir)
+	if err != nil {
+		return err
+	}
+
+	// Add to the stack.
+	*s = append(*s, &symlinkStackEntry{
+		dir:           dirCopy,
+		remainingPath: remainingPath,
+		linkUnwalked:  linkTargetParts,
+	})
+	return nil
+}
+
+func (s *symlinkStack) SwapLink(linkPart string, dir *os.File, remainingPath, linkTarget string) error {
+	// If we are currently inside a symlink resolution, remove the symlink
+	// component from the last symlink entry, but don't remove the entry even
+	// if it's empty. If we are a "tail-chained" symlink (a trailing symlink we
+	// hit during a symlink resolution) we need to keep the old symlink until
+	// we finish the resolution.
+	if err := s.popPart(linkPart); err != nil {
+		if !errors.Is(err, errEmptyStack) {
+			return err
+		}
+		// Push the component regardless of whether the stack was empty.
+	}
+	return s.push(dir, remainingPath, linkTarget)
+}
+
+func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) {
+	if s == nil || s.IsEmpty() {
+		return nil, "", false
+	}
+	tailEntry := (*s)[0]
+	*s = (*s)[1:]
+	return tailEntry.dir, tailEntry.remainingPath, true
+}
+
+// partialLookupInRoot tries to lookup as much of the request path as possible
+// within the provided root (a-la RESOLVE_IN_ROOT) and opens the final existing
+// component of the requested path, returning a file handle to the final
+// existing component and a string containing the remaining path components.
+func partialLookupInRoot(root *os.File, unsafePath string) (*os.File, string, error) {
+	return lookupInRoot(root, unsafePath, true)
+}
+
+func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) {
+	handle, remainingPath, err := lookupInRoot(root, unsafePath, false)
+	if remainingPath != "" && err == nil {
+		// should never happen
+		err = fmt.Errorf("[bug] non-empty remaining path when doing a non-partial lookup: %q", remainingPath)
+	}
+	// lookupInRoot(partial=false) will always close the handle if an error is
+	// returned, so no need to double-check here.
+	return handle, err
+}
+
+func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) {
+	unsafePath = filepath.ToSlash(unsafePath) // noop
+
+	// This is very similar to SecureJoin, except that we operate on the
+	// components using file descriptors. We then return the last component we
+	// managed open, along with the remaining path components not opened.
+
+	// Try to use openat2 if possible.
+	if hasOpenat2() {
+		return lookupOpenat2(root, unsafePath, partial)
+	}
+
+	// Get the "actual" root path from /proc/self/fd. This is necessary if the
+	// root is some magic-link like /proc/$pid/root, in which case we want to
+	// make sure when we do checkProcSelfFdPath that we are using the correct
+	// root path.
+	logicalRootPath, err := procSelfFdReadlink(root)
+	if err != nil {
+		return nil, "", fmt.Errorf("get real root path: %w", err)
+	}
+
+	currentDir, err := dupFile(root)
+	if err != nil {
+		return nil, "", fmt.Errorf("clone root fd: %w", err)
+	}
+	defer func() {
+		// If a handle is not returned, close the internal handle.
+		if Handle == nil {
+			_ = currentDir.Close()
+		}
+	}()
+
+	// symlinkStack is used to emulate how openat2(RESOLVE_IN_ROOT) treats
+	// dangling symlinks. If we hit a non-existent path while resolving a
+	// symlink, we need to return the (dir, remainingPath) that we had when we
+	// hit the symlink (treating the symlink as though it were a regular file).
+	// The set of (dir, remainingPath) sets is stored within the symlinkStack
+	// and we add and remove parts when we hit symlink and non-symlink
+	// components respectively. We need a stack because of recursive symlinks
+	// (symlinks that contain symlink components in their target).
+	//
+	// Note that the stack is ONLY used for book-keeping. All of the actual
+	// path walking logic is still based on currentPath/remainingPath and
+	// currentDir (as in SecureJoin).
+	var symStack *symlinkStack
+	if partial {
+		symStack = new(symlinkStack)
+		defer symStack.Close()
+	}
+
+	var (
+		linksWalked   int
+		currentPath   string
+		remainingPath = unsafePath
+	)
+	for remainingPath != "" {
+		// Save the current remaining path so if the part is not real we can
+		// return the path including the component.
+		oldRemainingPath := remainingPath
+
+		// Get the next path component.
+		var part string
+		if i := strings.IndexByte(remainingPath, '/'); i == -1 {
+			part, remainingPath = remainingPath, ""
+		} else {
+			part, remainingPath = remainingPath[:i], remainingPath[i+1:]
+		}
+		// If we hit an empty component, we need to treat it as though it is
+		// "." so that trailing "/" and "//" components on a non-directory
+		// correctly return the right error code.
+		if part == "" {
+			part = "."
+		}
+
+		// Apply the component lexically to the path we are building.
+		// currentPath does not contain any symlinks, and we are lexically
+		// dealing with a single component, so it's okay to do a filepath.Clean
+		// here.
+		nextPath := path.Join("/", currentPath, part)
+		// If we logically hit the root, just clone the root rather than
+		// opening the part and doing all of the other checks.
+		if nextPath == "/" {
+			if err := symStack.PopPart(part); err != nil {
+				return nil, "", fmt.Errorf("walking into root with part %q failed: %w", part, err)
+			}
+			// Jump to root.
+			rootClone, err := dupFile(root)
+			if err != nil {
+				return nil, "", fmt.Errorf("clone root fd: %w", err)
+			}
+			_ = currentDir.Close()
+			currentDir = rootClone
+			currentPath = nextPath
+			continue
+		}
+
+		// Try to open the next component.
+		nextDir, err := openatFile(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
+		switch {
+		case err == nil:
+			st, err := nextDir.Stat()
+			if err != nil {
+				_ = nextDir.Close()
+				return nil, "", fmt.Errorf("stat component %q: %w", part, err)
+			}
+
+			switch st.Mode() & os.ModeType {
+			case os.ModeSymlink:
+				// readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See
+				// Linux commit 65cfc6722361 ("readlinkat(), fchownat() and
+				// fstatat() with empty relative pathnames").
+				linkDest, err := readlinkatFile(nextDir, "")
+				// We don't need the handle anymore.
+				_ = nextDir.Close()
+				if err != nil {
+					return nil, "", err
+				}
+
+				linksWalked++
+				if linksWalked > maxSymlinkLimit {
+					return nil, "", &os.PathError{Op: "securejoin.lookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP}
+				}
+
+				// Swap out the symlink's component for the link entry itself.
+				if err := symStack.SwapLink(part, currentDir, oldRemainingPath, linkDest); err != nil {
+					return nil, "", fmt.Errorf("walking into symlink %q failed: push symlink: %w", part, err)
+				}
+
+				// Update our logical remaining path.
+				remainingPath = linkDest + "/" + remainingPath
+				// Absolute symlinks reset any work we've already done.
+				if path.IsAbs(linkDest) {
+					// Jump to root.
+					rootClone, err := dupFile(root)
+					if err != nil {
+						return nil, "", fmt.Errorf("clone root fd: %w", err)
+					}
+					_ = currentDir.Close()
+					currentDir = rootClone
+					currentPath = "/"
+				}
+
+			default:
+				// If we are dealing with a directory, simply walk into it.
+				_ = currentDir.Close()
+				currentDir = nextDir
+				currentPath = nextPath
+
+				// The part was real, so drop it from the symlink stack.
+				if err := symStack.PopPart(part); err != nil {
+					return nil, "", fmt.Errorf("walking into directory %q failed: %w", part, err)
+				}
+
+				// If we are operating on a .., make sure we haven't escaped.
+				// We only have to check for ".." here because walking down
+				// into a regular component component cannot cause you to
+				// escape. This mirrors the logic in RESOLVE_IN_ROOT, except we
+				// have to check every ".." rather than only checking after a
+				// rename or mount on the system.
+				if part == ".." {
+					// Make sure the root hasn't moved.
+					if err := checkProcSelfFdPath(logicalRootPath, root); err != nil {
+						return nil, "", fmt.Errorf("root path moved during lookup: %w", err)
+					}
+					// Make sure the path is what we expect.
+					fullPath := logicalRootPath + nextPath
+					if err := checkProcSelfFdPath(fullPath, currentDir); err != nil {
+						return nil, "", fmt.Errorf("walking into %q had unexpected result: %w", part, err)
+					}
+				}
+			}
+
+		default:
+			if !partial {
+				return nil, "", err
+			}
+			// If there are any remaining components in the symlink stack, we
+			// are still within a symlink resolution and thus we hit a dangling
+			// symlink. So pretend that the first symlink in the stack we hit
+			// was an ENOENT (to match openat2).
+			if oldDir, remainingPath, ok := symStack.PopTopSymlink(); ok {
+				_ = currentDir.Close()
+				return oldDir, remainingPath, err
+			}
+			// We have hit a final component that doesn't exist, so we have our
+			// partial open result. Note that we have to use the OLD remaining
+			// path, since the lookup failed.
+			return currentDir, oldRemainingPath, err
+		}
+	}
+
+	// If the unsafePath had a trailing slash, we need to make sure we try to
+	// do a relative "." open so that we will correctly return an error when
+	// the final component is a non-directory (to match openat2). In the
+	// context of openat2, a trailing slash and a trailing "/." are completely
+	// equivalent.
+	if strings.HasSuffix(unsafePath, "/") {
+		nextDir, err := openatFile(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
+		if err != nil {
+			if !partial {
+				_ = currentDir.Close()
+				currentDir = nil
+			}
+			return currentDir, "", err
+		}
+		_ = currentDir.Close()
+		currentDir = nextDir
+	}
+
+	// All of the components existed!
+	return currentDir, "", nil
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
@ -0,0 +1,236 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"golang.org/x/sys/unix"
+)
+
+var (
+	errInvalidMode    = errors.New("invalid permission mode")
+	errPossibleAttack = errors.New("possible attack detected")
+)
+
+// modePermExt is like os.ModePerm except that it also includes the set[ug]id
+// and sticky bits.
+const modePermExt = os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky
+
+//nolint:cyclop // this function needs to handle a lot of cases
+func toUnixMode(mode os.FileMode) (uint32, error) {
+	sysMode := uint32(mode.Perm())
+	if mode&os.ModeSetuid != 0 {
+		sysMode |= unix.S_ISUID
+	}
+	if mode&os.ModeSetgid != 0 {
+		sysMode |= unix.S_ISGID
+	}
+	if mode&os.ModeSticky != 0 {
+		sysMode |= unix.S_ISVTX
+	}
+	// We don't allow file type bits.
+	if mode&os.ModeType != 0 {
+		return 0, fmt.Errorf("%w %+.3o (%s): type bits not permitted", errInvalidMode, mode, mode)
+	}
+	// We don't allow other unknown modes.
+	if mode&^modePermExt != 0 || sysMode&unix.S_IFMT != 0 {
+		return 0, fmt.Errorf("%w %+.3o (%s): unknown mode bits", errInvalidMode, mode, mode)
+	}
+	return sysMode, nil
+}
+
+// MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use
+// in two respects:
+//
+//   - The caller provides the root directory as an *[os.File] (preferably O_PATH)
+//     handle. This means that the caller can be sure which root directory is
+//     being used. Note that this can be emulated by using /proc/self/fd/... as
+//     the root path with [os.MkdirAll].
+//
+//   - Once all of the directories have been created, an *[os.File] O_PATH handle
+//     to the directory at unsafePath is returned to the caller. This is done in
+//     an effectively-race-free way (an attacker would only be able to swap the
+//     final directory component), which is not possible to emulate with
+//     [MkdirAll].
+//
+// In addition, the returned handle is obtained far more efficiently than doing
+// a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after
+// doing [MkdirAll]. If you intend to open the directory after creating it, you
+// should use MkdirAllHandle.
+func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.File, Err error) {
+	unixMode, err := toUnixMode(mode)
+	if err != nil {
+		return nil, err
+	}
+	// On Linux, mkdirat(2) (and os.Mkdir) silently ignore the suid and sgid
+	// bits. We could also silently ignore them but since we have very few
+	// users it seems more prudent to return an error so users notice that
+	// these bits will not be set.
+	if unixMode&^0o1777 != 0 {
+		return nil, fmt.Errorf("%w for mkdir %+.3o: suid and sgid are ignored by mkdir", errInvalidMode, mode)
+	}
+
+	// Try to open as much of the path as possible.
+	currentDir, remainingPath, err := partialLookupInRoot(root, unsafePath)
+	defer func() {
+		if Err != nil {
+			_ = currentDir.Close()
+		}
+	}()
+	if err != nil && !errors.Is(err, unix.ENOENT) {
+		return nil, fmt.Errorf("find existing subpath of %q: %w", unsafePath, err)
+	}
+
+	// If there is an attacker deleting directories as we walk into them,
+	// detect this proactively. Note this is guaranteed to detect if the
+	// attacker deleted any part of the tree up to currentDir.
+	//
+	// Once we walk into a dead directory, partialLookupInRoot would not be
+	// able to walk further down the tree (directories must be empty before
+	// they are deleted), and if the attacker has removed the entire tree we
+	// can be sure that anything that was originally inside a dead directory
+	// must also be deleted and thus is a dead directory in its own right.
+	//
+	// This is mostly a quality-of-life check, because mkdir will simply fail
+	// later if the attacker deletes the tree after this check.
+	if err := isDeadInode(currentDir); err != nil {
+		return nil, fmt.Errorf("finding existing subpath of %q: %w", unsafePath, err)
+	}
+
+	// Re-open the path to match the O_DIRECTORY reopen loop later (so that we
+	// always return a non-O_PATH handle). We also check that we actually got a
+	// directory.
+	if reopenDir, err := Reopen(currentDir, unix.O_DIRECTORY|unix.O_CLOEXEC); errors.Is(err, unix.ENOTDIR) {
+		return nil, fmt.Errorf("cannot create subdirectories in %q: %w", currentDir.Name(), unix.ENOTDIR)
+	} else if err != nil {
+		return nil, fmt.Errorf("re-opening handle to %q: %w", currentDir.Name(), err)
+	} else {
+		_ = currentDir.Close()
+		currentDir = reopenDir
+	}
+
+	remainingParts := strings.Split(remainingPath, string(filepath.Separator))
+	if slices_Contains(remainingParts, "..") {
+		// The path contained ".." components after the end of the "real"
+		// components. We could try to safely resolve ".." here but that would
+		// add a bunch of extra logic for something that it's not clear even
+		// needs to be supported. So just return an error.
+		//
+		// If we do filepath.Clean(remainingPath) then we end up with the
+		// problem that ".." can erase a trailing dangling symlink and produce
+		// a path that doesn't quite match what the user asked for.
+		return nil, fmt.Errorf("%w: yet-to-be-created path %q contains '..' components", unix.ENOENT, remainingPath)
+	}
+
+	// Create the remaining components.
+	for _, part := range remainingParts {
+		switch part {
+		case "", ".":
+			// Skip over no-op paths.
+			continue
+		}
+
+		// NOTE: mkdir(2) will not follow trailing symlinks, so we can safely
+		// create the final component without worrying about symlink-exchange
+		// attacks.
+		//
+		// If we get -EEXIST, it's possible that another program created the
+		// directory at the same time as us. In that case, just continue on as
+		// if we created it (if the created inode is not a directory, the
+		// following open call will fail).
+		if err := unix.Mkdirat(int(currentDir.Fd()), part, unixMode); err != nil && !errors.Is(err, unix.EEXIST) {
+			err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err}
+			// Make the error a bit nicer if the directory is dead.
+			if deadErr := isDeadInode(currentDir); deadErr != nil {
+				// TODO: Once we bump the minimum Go version to 1.20, we can use
+				// multiple %w verbs for this wrapping. For now we need to use a
+				// compatibility shim for older Go versions.
+				//err = fmt.Errorf("%w (%w)", err, deadErr)
+				err = wrapBaseError(err, deadErr)
+			}
+			return nil, err
+		}
+
+		// Get a handle to the next component. O_DIRECTORY means we don't need
+		// to use O_PATH.
+		var nextDir *os.File
+		if hasOpenat2() {
+			nextDir, err = openat2File(currentDir, part, &unix.OpenHow{
+				Flags:   unix.O_NOFOLLOW | unix.O_DIRECTORY | unix.O_CLOEXEC,
+				Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_NO_XDEV,
+			})
+		} else {
+			nextDir, err = openatFile(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+		}
+		if err != nil {
+			return nil, err
+		}
+		_ = currentDir.Close()
+		currentDir = nextDir
+
+		// It's possible that the directory we just opened was swapped by an
+		// attacker. Unfortunately there isn't much we can do to protect
+		// against this, and MkdirAll's behaviour is that we will reuse
+		// existing directories anyway so the need to protect against this is
+		// incredibly limited (and arguably doesn't even deserve mention here).
+		//
+		// Ideally we might want to check that the owner and mode match what we
+		// would've created -- unfortunately, it is non-trivial to verify that
+		// the owner and mode of the created directory match. While plain Unix
+		// DAC rules seem simple enough to emulate, there are a bunch of other
+		// factors that can change the mode or owner of created directories
+		// (default POSIX ACLs, mount options like uid=1,gid=2,umask=0 on
+		// filesystems like vfat, etc etc). We used to try to verify this but
+		// it just lead to a series of spurious errors.
+		//
+		// We could also check that the directory is non-empty, but
+		// unfortunately some pseduofilesystems (like cgroupfs) create
+		// non-empty directories, which would result in different spurious
+		// errors.
+	}
+	return currentDir, nil
+}
+
+// MkdirAll is a race-safe alternative to the [os.MkdirAll] function,
+// where the new directory is guaranteed to be within the root directory (if an
+// attacker can move directories from inside the root to outside the root, the
+// created directory tree might be outside of the root but the key constraint
+// is that at no point will we walk outside of the directory tree we are
+// creating).
+//
+// Effectively, MkdirAll(root, unsafePath, mode) is equivalent to
+//
+//	path, _ := securejoin.SecureJoin(root, unsafePath)
+//	err := os.MkdirAll(path, mode)
+//
+// But is much safer. The above implementation is unsafe because if an attacker
+// can modify the filesystem tree between [SecureJoin] and [os.MkdirAll], it is
+// possible for MkdirAll to resolve unsafe symlink components and create
+// directories outside of the root.
+//
+// If you plan to open the directory after you have created it or want to use
+// an open directory handle as the root, you should use [MkdirAllHandle] instead.
+// This function is a wrapper around [MkdirAllHandle].
+func MkdirAll(root, unsafePath string, mode os.FileMode) error {
+	rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return err
+	}
+	defer rootDir.Close()
+
+	f, err := MkdirAllHandle(rootDir, unsafePath, mode)
+	if err != nil {
+		return err
+	}
+	_ = f.Close()
+	return nil
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/open_linux.go
@ -0,0 +1,103 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+
+	"golang.org/x/sys/unix"
+)
+
+// OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided
+// using an *[os.File] handle, to ensure that the correct root directory is used.
+func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) {
+	handle, err := completeLookupInRoot(root, unsafePath)
+	if err != nil {
+		return nil, &os.PathError{Op: "securejoin.OpenInRoot", Path: unsafePath, Err: err}
+	}
+	return handle, nil
+}
+
+// OpenInRoot safely opens the provided unsafePath within the root.
+// Effectively, OpenInRoot(root, unsafePath) is equivalent to
+//
+//	path, _ := securejoin.SecureJoin(root, unsafePath)
+//	handle, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
+//
+// But is much safer. The above implementation is unsafe because if an attacker
+// can modify the filesystem tree between [SecureJoin] and [os.OpenFile], it is
+// possible for the returned file to be outside of the root.
+//
+// Note that the returned handle is an O_PATH handle, meaning that only a very
+// limited set of operations will work on the handle. This is done to avoid
+// accidentally opening an untrusted file that could cause issues (such as a
+// disconnected TTY that could cause a DoS, or some other issue). In order to
+// use the returned handle, you can "upgrade" it to a proper handle using
+// [Reopen].
+func OpenInRoot(root, unsafePath string) (*os.File, error) {
+	rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return nil, err
+	}
+	defer rootDir.Close()
+	return OpenatInRoot(rootDir, unsafePath)
+}
+
+// Reopen takes an *[os.File] handle and re-opens it through /proc/self/fd.
+// Reopen(file, flags) is effectively equivalent to
+//
+//	fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd())
+//	os.OpenFile(fdPath, flags|unix.O_CLOEXEC)
+//
+// But with some extra hardenings to ensure that we are not tricked by a
+// maliciously-configured /proc mount. While this attack scenario is not
+// common, in container runtimes it is possible for higher-level runtimes to be
+// tricked into configuring an unsafe /proc that can be used to attack file
+// operations. See [CVE-2019-19921] for more details.
+//
+// [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw
+func Reopen(handle *os.File, flags int) (*os.File, error) {
+	procRoot, err := getProcRoot()
+	if err != nil {
+		return nil, err
+	}
+
+	// We can't operate on /proc/thread-self/fd/$n directly when doing a
+	// re-open, so we need to open /proc/thread-self/fd and then open a single
+	// final component.
+	procFdDir, closer, err := procThreadSelf(procRoot, "fd/")
+	if err != nil {
+		return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err)
+	}
+	defer procFdDir.Close()
+	defer closer()
+
+	// Try to detect if there is a mount on top of the magic-link we are about
+	// to open. If we are using unsafeHostProcRoot(), this could change after
+	// we check it (and there's nothing we can do about that) but for
+	// privateProcRoot() this should be guaranteed to be safe (at least since
+	// Linux 5.12[1], when anonymous mount namespaces were completely isolated
+	// from external mounts including mount propagation events).
+	//
+	// [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
+	// onto targets that reside on shared mounts").
+	fdStr := strconv.Itoa(int(handle.Fd()))
+	if err := checkSymlinkOvermount(procRoot, procFdDir, fdStr); err != nil {
+		return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err)
+	}
+
+	flags |= unix.O_CLOEXEC
+	// Rather than just wrapping openatFile, open-code it so we can copy
+	// handle.Name().
+	reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0)
+	if err != nil {
+		return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err)
+	}
+	return os.NewFile(uintptr(reopenFd), handle.Name()), nil
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
@ -0,0 +1,127 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"golang.org/x/sys/unix"
+)
+
+var hasOpenat2 = sync_OnceValue(func() bool {
+	fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{
+		Flags:   unix.O_PATH | unix.O_CLOEXEC,
+		Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT,
+	})
+	if err != nil {
+		return false
+	}
+	_ = unix.Close(fd)
+	return true
+})
+
+func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool {
+	// RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve
+	// ".." while a mount or rename occurs anywhere on the system. This could
+	// happen spuriously, or as the result of an attacker trying to mess with
+	// us during lookup.
+	//
+	// In addition, scoped lookups have a "safety check" at the end of
+	// complete_walk which will return -EXDEV if the final path is not in the
+	// root.
+	return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 &&
+		(errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV))
+}
+
+const scopedLookupMaxRetries = 10
+
+func openat2File(dir *os.File, path string, how *unix.OpenHow) (*os.File, error) {
+	fullPath := dir.Name() + "/" + path
+	// Make sure we always set O_CLOEXEC.
+	how.Flags |= unix.O_CLOEXEC
+	var tries int
+	for tries < scopedLookupMaxRetries {
+		fd, err := unix.Openat2(int(dir.Fd()), path, how)
+		if err != nil {
+			if scopedLookupShouldRetry(how, err) {
+				// We retry a couple of times to avoid the spurious errors, and
+				// if we are being attacked then returning -EAGAIN is the best
+				// we can do.
+				tries++
+				continue
+			}
+			return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err}
+		}
+		// If we are using RESOLVE_IN_ROOT, the name we generated may be wrong.
+		// NOTE: The procRoot code MUST NOT use RESOLVE_IN_ROOT, otherwise
+		//       you'll get infinite recursion here.
+		if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT {
+			if actualPath, err := rawProcSelfFdReadlink(fd); err == nil {
+				fullPath = actualPath
+			}
+		}
+		return os.NewFile(uintptr(fd), fullPath), nil
+	}
+	return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: errPossibleAttack}
+}
+
+func lookupOpenat2(root *os.File, unsafePath string, partial bool) (*os.File, string, error) {
+	if !partial {
+		file, err := openat2File(root, unsafePath, &unix.OpenHow{
+			Flags:   unix.O_PATH | unix.O_CLOEXEC,
+			Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
+		})
+		return file, "", err
+	}
+	return partialLookupOpenat2(root, unsafePath)
+}
+
+// partialLookupOpenat2 is an alternative implementation of
+// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a
+// handle to the deepest existing child of the requested path within the root.
+func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, error) {
+	// TODO: Implement this as a git-bisect-like binary search.
+
+	unsafePath = filepath.ToSlash(unsafePath) // noop
+	endIdx := len(unsafePath)
+	var lastError error
+	for endIdx > 0 {
+		subpath := unsafePath[:endIdx]
+
+		handle, err := openat2File(root, subpath, &unix.OpenHow{
+			Flags:   unix.O_PATH | unix.O_CLOEXEC,
+			Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
+		})
+		if err == nil {
+			// Jump over the slash if we have a non-"" remainingPath.
+			if endIdx < len(unsafePath) {
+				endIdx += 1
+			}
+			// We found a subpath!
+			return handle, unsafePath[endIdx:], lastError
+		}
+		if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) {
+			// That path doesn't exist, let's try the next directory up.
+			endIdx = strings.LastIndexByte(subpath, '/')
+			lastError = err
+			continue
+		}
+		return nil, "", fmt.Errorf("open subpath: %w", err)
+	}
+	// If we couldn't open anything, the whole subpath is missing. Return a
+	// copy of the root fd so that the caller doesn't close this one by
+	// accident.
+	rootClone, err := dupFile(root)
+	if err != nil {
+		return nil, "", err
+	}
+	return rootClone, unsafePath, lastError
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
@ -0,0 +1,59 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"os"
+	"path/filepath"
+
+	"golang.org/x/sys/unix"
+)
+
+func dupFile(f *os.File) (*os.File, error) {
+	fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
+	if err != nil {
+		return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err)
+	}
+	return os.NewFile(uintptr(fd), f.Name()), nil
+}
+
+func openatFile(dir *os.File, path string, flags int, mode int) (*os.File, error) {
+	// Make sure we always set O_CLOEXEC.
+	flags |= unix.O_CLOEXEC
+	fd, err := unix.Openat(int(dir.Fd()), path, flags, uint32(mode))
+	if err != nil {
+		return nil, &os.PathError{Op: "openat", Path: dir.Name() + "/" + path, Err: err}
+	}
+	// All of the paths we use with openatFile(2) are guaranteed to be
+	// lexically safe, so we can use path.Join here.
+	fullPath := filepath.Join(dir.Name(), path)
+	return os.NewFile(uintptr(fd), fullPath), nil
+}
+
+func fstatatFile(dir *os.File, path string, flags int) (unix.Stat_t, error) {
+	var stat unix.Stat_t
+	if err := unix.Fstatat(int(dir.Fd()), path, &stat, flags); err != nil {
+		return stat, &os.PathError{Op: "fstatat", Path: dir.Name() + "/" + path, Err: err}
+	}
+	return stat, nil
+}
+
+func readlinkatFile(dir *os.File, path string) (string, error) {
+	size := 4096
+	for {
+		linkBuf := make([]byte, size)
+		n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf)
+		if err != nil {
+			return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err}
+		}
+		if n != size {
+			return string(linkBuf[:n]), nil
+		}
+		// Possible truncation, resize the buffer.
+		size *= 2
+	}
+}
--- a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
@ -0,0 +1,452 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"runtime"
+	"strconv"
+
+	"golang.org/x/sys/unix"
+)
+
+func fstat(f *os.File) (unix.Stat_t, error) {
+	var stat unix.Stat_t
+	if err := unix.Fstat(int(f.Fd()), &stat); err != nil {
+		return stat, &os.PathError{Op: "fstat", Path: f.Name(), Err: err}
+	}
+	return stat, nil
+}
+
+func fstatfs(f *os.File) (unix.Statfs_t, error) {
+	var statfs unix.Statfs_t
+	if err := unix.Fstatfs(int(f.Fd()), &statfs); err != nil {
+		return statfs, &os.PathError{Op: "fstatfs", Path: f.Name(), Err: err}
+	}
+	return statfs, nil
+}
+
+// The kernel guarantees that the root inode of a procfs mount has an
+// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO.
+const (
+	procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC
+	procRootIno    = 1      // PROC_ROOT_INO
+)
+
+func verifyProcRoot(procRoot *os.File) error {
+	if statfs, err := fstatfs(procRoot); err != nil {
+		return err
+	} else if statfs.Type != procSuperMagic {
+		return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type)
+	}
+	if stat, err := fstat(procRoot); err != nil {
+		return err
+	} else if stat.Ino != procRootIno {
+		return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino)
+	}
+	return nil
+}
+
+var hasNewMountApi = sync_OnceValue(func() bool {
+	// All of the pieces of the new mount API we use (fsopen, fsconfig,
+	// fsmount, open_tree) were added together in Linux 5.1[1,2], so we can
+	// just check for one of the syscalls and the others should also be
+	// available.
+	//
+	// Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE.
+	// This is equivalent to openat(2), but tells us if open_tree is
+	// available (and thus all of the other basic new mount API syscalls).
+	// open_tree(2) is most light-weight syscall to test here.
+	//
+	// [1]: merge commit 400913252d09
+	// [2]: <https://lore.kernel.org/lkml/153754740781.17872.7869536526927736855.stgit@warthog.procyon.org.uk/>
+	fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC)
+	if err != nil {
+		return false
+	}
+	_ = unix.Close(fd)
+	return true
+})
+
+func fsopen(fsName string, flags int) (*os.File, error) {
+	// Make sure we always set O_CLOEXEC.
+	flags |= unix.FSOPEN_CLOEXEC
+	fd, err := unix.Fsopen(fsName, flags)
+	if err != nil {
+		return nil, os.NewSyscallError("fsopen "+fsName, err)
+	}
+	return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil
+}
+
+func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) {
+	// Make sure we always set O_CLOEXEC.
+	flags |= unix.FSMOUNT_CLOEXEC
+	fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs)
+	if err != nil {
+		return nil, os.NewSyscallError("fsmount "+ctx.Name(), err)
+	}
+	return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil
+}
+
+func newPrivateProcMount() (*os.File, error) {
+	procfsCtx, err := fsopen("proc", unix.FSOPEN_CLOEXEC)
+	if err != nil {
+		return nil, err
+	}
+	defer procfsCtx.Close()
+
+	// Try to configure hidepid=ptraceable,subset=pid if possible, but ignore errors.
+	_ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable")
+	_ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid")
+
+	// Get an actual handle.
+	if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil {
+		return nil, os.NewSyscallError("fsconfig create procfs", err)
+	}
+	return fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID)
+}
+
+func openTree(dir *os.File, path string, flags uint) (*os.File, error) {
+	dirFd := -int(unix.EBADF)
+	dirName := "."
+	if dir != nil {
+		dirFd = int(dir.Fd())
+		dirName = dir.Name()
+	}
+	// Make sure we always set O_CLOEXEC.
+	flags |= unix.OPEN_TREE_CLOEXEC
+	fd, err := unix.OpenTree(dirFd, path, flags)
+	if err != nil {
+		return nil, &os.PathError{Op: "open_tree", Path: path, Err: err}
+	}
+	return os.NewFile(uintptr(fd), dirName+"/"+path), nil
+}
+
+func clonePrivateProcMount() (_ *os.File, Err error) {
+	// Try to make a clone without using AT_RECURSIVE if we can. If this works,
+	// we can be sure there are no over-mounts and so if the root is valid then
+	// we're golden. Otherwise, we have to deal with over-mounts.
+	procfsHandle, err := openTree(nil, "/proc", unix.OPEN_TREE_CLONE)
+	if err != nil || hookForcePrivateProcRootOpenTreeAtRecursive(procfsHandle) {
+		procfsHandle, err = openTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE)
+	}
+	if err != nil {
+		return nil, fmt.Errorf("creating a detached procfs clone: %w", err)
+	}
+	defer func() {
+		if Err != nil {
+			_ = procfsHandle.Close()
+		}
+	}()
+	if err := verifyProcRoot(procfsHandle); err != nil {
+		return nil, err
+	}
+	return procfsHandle, nil
+}
+
+func privateProcRoot() (*os.File, error) {
+	if !hasNewMountApi() || hookForceGetProcRootUnsafe() {
+		return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP)
+	}
+	// Try to create a new procfs mount from scratch if we can. This ensures we
+	// can get a procfs mount even if /proc is fake (for whatever reason).
+	procRoot, err := newPrivateProcMount()
+	if err != nil || hookForcePrivateProcRootOpenTree(procRoot) {
+		// Try to clone /proc then...
+		procRoot, err = clonePrivateProcMount()
+	}
+	return procRoot, err
+}
+
+func unsafeHostProcRoot() (_ *os.File, Err error) {
+	procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if Err != nil {
+			_ = procRoot.Close()
+		}
+	}()
+	if err := verifyProcRoot(procRoot); err != nil {
+		return nil, err
+	}
+	return procRoot, nil
+}
+
+func doGetProcRoot() (*os.File, error) {
+	procRoot, err := privateProcRoot()
+	if err != nil {
+		// Fall back to using a /proc handle if making a private mount failed.
+		// If we have openat2, at least we can avoid some kinds of over-mount
+		// attacks, but without openat2 there's not much we can do.
+		procRoot, err = unsafeHostProcRoot()
+	}
+	return procRoot, err
+}
+
+var getProcRoot = sync_OnceValues(func() (*os.File, error) {
+	return doGetProcRoot()
+})
+
+var hasProcThreadSelf = sync_OnceValue(func() bool {
+	return unix.Access("/proc/thread-self/", unix.F_OK) == nil
+})
+
+var errUnsafeProcfs = errors.New("unsafe procfs detected")
+
+type procThreadSelfCloser func()
+
+// procThreadSelf returns a handle to /proc/thread-self/<subpath> (or an
+// equivalent handle on older kernels where /proc/thread-self doesn't exist).
+// Once finished with the handle, you must call the returned closer function
+// (runtime.UnlockOSThread). You must not pass the returned *os.File to other
+// Go threads or use the handle after calling the closer.
+//
+// This is similar to ProcThreadSelf from runc, but with extra hardening
+// applied and using *os.File.
+func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThreadSelfCloser, Err error) {
+	// We need to lock our thread until the caller is done with the handle
+	// because between getting the handle and using it we could get interrupted
+	// by the Go runtime and hit the case where the underlying thread is
+	// swapped out and the original thread is killed, resulting in
+	// pull-your-hair-out-hard-to-debug issues in the caller.
+	runtime.LockOSThread()
+	defer func() {
+		if Err != nil {
+			runtime.UnlockOSThread()
+		}
+	}()
+
+	// Figure out what prefix we want to use.
+	threadSelf := "thread-self/"
+	if !hasProcThreadSelf() || hookForceProcSelfTask() {
+		/// Pre-3.17 kernels don't have /proc/thread-self, so do it manually.
+		threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + "/"
+		if _, err := fstatatFile(procRoot, threadSelf, unix.AT_SYMLINK_NOFOLLOW); err != nil || hookForceProcSelf() {
+			// In this case, we running in a pid namespace that doesn't match
+			// the /proc mount we have. This can happen inside runc.
+			//
+			// Unfortunately, there is no nice way to get the correct TID to
+			// use here because of the age of the kernel, so we have to just
+			// use /proc/self and hope that it works.
+			threadSelf = "self/"
+		}
+	}
+
+	// Grab the handle.
+	var (
+		handle *os.File
+		err    error
+	)
+	if hasOpenat2() {
+		// We prefer being able to use RESOLVE_NO_XDEV if we can, to be
+		// absolutely sure we are operating on a clean /proc handle that
+		// doesn't have any cheeky overmounts that could trick us (including
+		// symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't
+		// strictly needed, but just use it since we have it.
+		//
+		// NOTE: /proc/self is technically a magic-link (the contents of the
+		//       symlink are generated dynamically), but it doesn't use
+		//       nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it.
+		//
+		// NOTE: We MUST NOT use RESOLVE_IN_ROOT here, as openat2File uses
+		//       procSelfFdReadlink to clean up the returned f.Name() if we use
+		//       RESOLVE_IN_ROOT (which would lead to an infinite recursion).
+		handle, err = openat2File(procRoot, threadSelf+subpath, &unix.OpenHow{
+			Flags:   unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC,
+			Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS,
+		})
+		if err != nil {
+			// TODO: Once we bump the minimum Go version to 1.20, we can use
+			// multiple %w verbs for this wrapping. For now we need to use a
+			// compatibility shim for older Go versions.
+			//err = fmt.Errorf("%w: %w", errUnsafeProcfs, err)
+			return nil, nil, wrapBaseError(err, errUnsafeProcfs)
+		}
+	} else {
+		handle, err = openatFile(procRoot, threadSelf+subpath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
+		if err != nil {
+			// TODO: Once we bump the minimum Go version to 1.20, we can use
+			// multiple %w verbs for this wrapping. For now we need to use a
+			// compatibility shim for older Go versions.
+			//err = fmt.Errorf("%w: %w", errUnsafeProcfs, err)
+			return nil, nil, wrapBaseError(err, errUnsafeProcfs)
+		}
+		defer func() {
+			if Err != nil {
+				_ = handle.Close()
+			}
+		}()
+		// We can't detect bind-mounts of different parts of procfs on top of
+		// /proc (a-la RESOLVE_NO_XDEV), but we can at least be sure that we
+		// aren't on the wrong filesystem here.
+		if statfs, err := fstatfs(handle); err != nil {
+			return nil, nil, err
+		} else if statfs.Type != procSuperMagic {
+			return nil, nil, fmt.Errorf("%w: incorrect /proc/self/fd filesystem type 0x%x", errUnsafeProcfs, statfs.Type)
+		}
+	}
+	return handle, runtime.UnlockOSThread, nil
+}
+
+// STATX_MNT_ID_UNIQUE is provided in golang.org/x/sys@v0.20.0, but in order to
+// avoid bumping the requirement for a single constant we can just define it
+// ourselves.
+const STATX_MNT_ID_UNIQUE = 0x4000
+
+var hasStatxMountId = sync_OnceValue(func() bool {
+	var (
+		stx unix.Statx_t
+		// We don't care which mount ID we get. The kernel will give us the
+		// unique one if it is supported.
+		wantStxMask uint32 = STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
+	)
+	err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx)
+	return err == nil && stx.Mask&wantStxMask != 0
+})
+
+func getMountId(dir *os.File, path string) (uint64, error) {
+	// If we don't have statx(STATX_MNT_ID*) support, we can't do anything.
+	if !hasStatxMountId() {
+		return 0, nil
+	}
+
+	var (
+		stx unix.Statx_t
+		// We don't care which mount ID we get. The kernel will give us the
+		// unique one if it is supported.
+		wantStxMask uint32 = STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
+	)
+
+	err := unix.Statx(int(dir.Fd()), path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, int(wantStxMask), &stx)
+	if stx.Mask&wantStxMask == 0 {
+		// It's not a kernel limitation, for some reason we couldn't get a
+		// mount ID. Assume it's some kind of attack.
+		err = fmt.Errorf("%w: could not get mount id", errUnsafeProcfs)
+	}
+	if err != nil {
+		return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: dir.Name() + "/" + path, Err: err}
+	}
+	return stx.Mnt_id, nil
+}
+
+func checkSymlinkOvermount(procRoot *os.File, dir *os.File, path string) error {
+	// Get the mntId of our procfs handle.
+	expectedMountId, err := getMountId(procRoot, "")
+	if err != nil {
+		return err
+	}
+	// Get the mntId of the target magic-link.
+	gotMountId, err := getMountId(dir, path)
+	if err != nil {
+		return err
+	}
+	// As long as the directory mount is alive, even with wrapping mount IDs,
+	// we would expect to see a different mount ID here. (Of course, if we're
+	// using unsafeHostProcRoot() then an attaker could change this after we
+	// did this check.)
+	if expectedMountId != gotMountId {
+		return fmt.Errorf("%w: symlink %s/%s has an overmount obscuring the real link (mount ids do not match %d != %d)", errUnsafeProcfs, dir.Name(), path, expectedMountId, gotMountId)
+	}
+	return nil
+}
+
+func doRawProcSelfFdReadlink(procRoot *os.File, fd int) (string, error) {
+	fdPath := fmt.Sprintf("fd/%d", fd)
+	procFdLink, closer, err := procThreadSelf(procRoot, fdPath)
+	if err != nil {
+		return "", fmt.Errorf("get safe /proc/thread-self/%s handle: %w", fdPath, err)
+	}
+	defer procFdLink.Close()
+	defer closer()
+
+	// Try to detect if there is a mount on top of the magic-link. Since we use the handle directly
+	// provide to the closure. If the closure uses the handle directly, this
+	// should be safe in general (a mount on top of the path afterwards would
+	// not affect the handle itself) and will definitely be safe if we are
+	// using privateProcRoot() (at least since Linux 5.12[1], when anonymous
+	// mount namespaces were completely isolated from external mounts including
+	// mount propagation events).
+	//
+	// [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
+	// onto targets that reside on shared mounts").
+	if err := checkSymlinkOvermount(procRoot, procFdLink, ""); err != nil {
+		return "", fmt.Errorf("check safety of /proc/thread-self/fd/%d magiclink: %w", fd, err)
+	}
+
+	// readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit
+	// 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty
+	// relative pathnames").
+	return readlinkatFile(procFdLink, "")
+}
+
+func rawProcSelfFdReadlink(fd int) (string, error) {
+	procRoot, err := getProcRoot()
+	if err != nil {
+		return "", err
+	}
+	return doRawProcSelfFdReadlink(procRoot, fd)
+}
+
+func procSelfFdReadlink(f *os.File) (string, error) {
+	return rawProcSelfFdReadlink(int(f.Fd()))
+}
+
+var (
+	errPossibleBreakout = errors.New("possible breakout detected")
+	errInvalidDirectory = errors.New("wandered into deleted directory")
+	errDeletedInode     = errors.New("cannot verify path of deleted inode")
+)
+
+func isDeadInode(file *os.File) error {
+	// If the nlink of a file drops to 0, there is an attacker deleting
+	// directories during our walk, which could result in weird /proc values.
+	// It's better to error out in this case.
+	stat, err := fstat(file)
+	if err != nil {
+		return fmt.Errorf("check for dead inode: %w", err)
+	}
+	if stat.Nlink == 0 {
+		err := errDeletedInode
+		if stat.Mode&unix.S_IFMT == unix.S_IFDIR {
+			err = errInvalidDirectory
+		}
+		return fmt.Errorf("%w %q", err, file.Name())
+	}
+	return nil
+}
+
+func checkProcSelfFdPath(path string, file *os.File) error {
+	if err := isDeadInode(file); err != nil {
+		return err
+	}
+	actualPath, err := procSelfFdReadlink(file)
+	if err != nil {
+		return fmt.Errorf("get path of handle: %w", err)
+	}
+	if actualPath != path {
+		return fmt.Errorf("%w: handle path %q doesn't match expected path %q", errPossibleBreakout, actualPath, path)
+	}
+	return nil
+}
+
+// Test hooks used in the procfs tests to verify that the fallback logic works.
+// See testing_mocks_linux_test.go and procfs_linux_test.go for more details.
+var (
+	hookForcePrivateProcRootOpenTree            = hookDummyFile
+	hookForcePrivateProcRootOpenTreeAtRecursive = hookDummyFile
+	hookForceGetProcRootUnsafe                  = hookDummy
+
+	hookForceProcSelfTask = hookDummy
+	hookForceProcSelf     = hookDummy
+)
+
+func hookDummy() bool               { return false }
+func hookDummyFile(_ *os.File) bool { return false }
--- a/vendor/github.com/cyphar/filepath-securejoin/vfs.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go
@ -0,0 +1,35 @@
+// Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import "os"
+
+// In future this should be moved into a separate package, because now there
+// are several projects (umoci and go-mtree) that are using this sort of
+// interface.
+
+// VFS is the minimal interface necessary to use [SecureJoinVFS]. A nil VFS is
+// equivalent to using the standard [os].* family of functions. This is mainly
+// used for the purposes of mock testing, but also can be used to otherwise use
+// [SecureJoinVFS] with VFS-like system.
+type VFS interface {
+	// Lstat returns an [os.FileInfo] describing the named file. If the
+	// file is a symbolic link, the returned [os.FileInfo] describes the
+	// symbolic link. Lstat makes no attempt to follow the link.
+	// The semantics are identical to [os.Lstat].
+	Lstat(name string) (os.FileInfo, error)
+
+	// Readlink returns the destination of the named symbolic link.
+	// The semantics are identical to [os.Readlink].
+	Readlink(name string) (string, error)
+}
+
+// osVFS is the "nil" VFS, in that it just passes everything through to the os
+// module.
+type osVFS struct{}
+
+func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) }
+
+func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) }
--- a/vendor/github.com/opencontainers/runc/LICENSE
+++ b/vendor/github.com/opencontainers/runc/LICENSE
@ -0,0 +1,191 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2014 Docker, Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/vendor/github.com/opencontainers/runc/NOTICE
+++ b/vendor/github.com/opencontainers/runc/NOTICE
@ -0,0 +1,17 @@
+runc
+
+Copyright 2012-2015 Docker, Inc.
+
+This product includes software developed at Docker, Inc. (http://www.docker.com).
+
+The following is courtesy of our legal counsel:
+
+
+Use and transfer of Docker may be subject to certain restrictions by the
+United States and other governments.
+It is your responsibility to ensure that your use and/or transfer does not
+violate applicable laws.
+
+For more information, please see http://www.bis.doc.gov
+
+See also http://www.apache.org/dev/crypto.html and/or seek legal counsel.
--- a/vendor/github.com/opencontainers/runc/libcontainer/dmz/cloned_binary_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/dmz/cloned_binary_linux.go
@ -0,0 +1,258 @@
+package dmz
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+
+	"github.com/opencontainers/runc/libcontainer/system"
+)
+
+type SealFunc func(**os.File) error
+
+var (
+	_ SealFunc = sealMemfd
+	_ SealFunc = sealFile
+)
+
+func isExecutable(f *os.File) bool {
+	if err := unix.Faccessat(int(f.Fd()), "", unix.X_OK, unix.AT_EACCESS|unix.AT_EMPTY_PATH); err == nil {
+		return true
+	} else if err == unix.EACCES {
+		return false
+	}
+	path := "/proc/self/fd/" + strconv.Itoa(int(f.Fd()))
+	if err := unix.Access(path, unix.X_OK); err == nil {
+		return true
+	} else if err == unix.EACCES {
+		return false
+	}
+	// Cannot check -- assume it's executable (if not, exec will fail).
+	logrus.Debugf("cannot do X_OK check on binary %s -- assuming it's executable", f.Name())
+	return true
+}
+
+const baseMemfdSeals = unix.F_SEAL_SEAL | unix.F_SEAL_SHRINK | unix.F_SEAL_GROW | unix.F_SEAL_WRITE
+
+func sealMemfd(f **os.File) error {
+	if err := (*f).Chmod(0o511); err != nil {
+		return err
+	}
+	// Try to set the newer memfd sealing flags, but we ignore
+	// errors because they are not needed and we want to continue
+	// to work on older kernels.
+	fd := (*f).Fd()
+	// F_SEAL_FUTURE_WRITE -- Linux 5.1
+	_, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, unix.F_SEAL_FUTURE_WRITE)
+	// F_SEAL_EXEC -- Linux 6.3
+	const F_SEAL_EXEC = 0x20 //nolint:revive // this matches the unix.* name
+	_, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, F_SEAL_EXEC)
+	// Apply all original memfd seals.
+	_, err := unix.FcntlInt(fd, unix.F_ADD_SEALS, baseMemfdSeals)
+	return os.NewSyscallError("fcntl(F_ADD_SEALS)", err)
+}
+
+// Memfd creates a sealable executable memfd (supported since Linux 3.17).
+func Memfd(comment string) (*os.File, SealFunc, error) {
+	file, err := system.ExecutableMemfd("runc_cloned:"+comment, unix.MFD_ALLOW_SEALING|unix.MFD_CLOEXEC)
+	return file, sealMemfd, err
+}
+
+func sealFile(f **os.File) error {
+	// When sealing an O_TMPFILE-style descriptor we need to
+	// re-open the path as O_PATH to clear the existing write
+	// handle we have.
+	opath, err := os.OpenFile(fmt.Sprintf("/proc/self/fd/%d", (*f).Fd()), unix.O_PATH|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return fmt.Errorf("reopen tmpfile: %w", err)
+	}
+	_ = (*f).Close()
+	*f = opath
+	return nil
+}
+
+// otmpfile creates an open(O_TMPFILE) file in the given directory (supported
+// since Linux 3.11).
+func otmpfile(dir string) (*os.File, SealFunc, error) {
+	file, err := os.OpenFile(dir, unix.O_TMPFILE|unix.O_RDWR|unix.O_EXCL|unix.O_CLOEXEC, 0o700)
+	if err != nil {
+		return nil, nil, fmt.Errorf("O_TMPFILE creation failed: %w", err)
+	}
+	// Make sure we actually got an unlinked O_TMPFILE descriptor.
+	var stat unix.Stat_t
+	if err := unix.Fstat(int(file.Fd()), &stat); err != nil {
+		file.Close()
+		return nil, nil, fmt.Errorf("cannot fstat O_TMPFILE fd: %w", err)
+	} else if stat.Nlink != 0 {
+		file.Close()
+		return nil, nil, errors.New("O_TMPFILE has non-zero nlink")
+	}
+	return file, sealFile, err
+}
+
+// mktemp creates a classic unlinked file in the given directory.
+func mktemp(dir string) (*os.File, SealFunc, error) {
+	file, err := os.CreateTemp(dir, "runc.")
+	if err != nil {
+		return nil, nil, err
+	}
+	// Unlink the file and verify it was unlinked.
+	if err := os.Remove(file.Name()); err != nil {
+		return nil, nil, fmt.Errorf("unlinking classic tmpfile: %w", err)
+	}
+	if err := file.Chmod(0o511); err != nil {
+		return nil, nil, fmt.Errorf("chmod classic tmpfile: %w", err)
+	}
+	var stat unix.Stat_t
+	if err := unix.Fstat(int(file.Fd()), &stat); err != nil {
+		return nil, nil, fmt.Errorf("cannot fstat classic tmpfile: %w", err)
+	} else if stat.Nlink != 0 {
+		return nil, nil, fmt.Errorf("classic tmpfile %s has non-zero nlink after unlink", file.Name())
+	}
+	return file, sealFile, err
+}
+
+func getSealableFile(comment, tmpDir string) (file *os.File, sealFn SealFunc, err error) {
+	// First, try an executable memfd (supported since Linux 3.17).
+	file, sealFn, err = Memfd(comment)
+	if err == nil {
+		return
+	}
+	logrus.Debugf("memfd cloned binary failed, falling back to O_TMPFILE: %v", err)
+
+	// The tmpDir here (c.root) might be mounted noexec, so we need a couple of
+	// fallbacks to try. It's possible that none of these are writable and
+	// executable, in which case there's nothing we can practically do (other
+	// than mounting our own executable tmpfs, which would have its own
+	// issues).
+	tmpDirs := []string{
+		tmpDir,
+		os.TempDir(),
+		"/tmp",
+		".",
+		"/bin",
+		"/",
+	}
+
+	// Try to fallback to O_TMPFILE (supported since Linux 3.11).
+	for _, dir := range tmpDirs {
+		file, sealFn, err = otmpfile(dir)
+		if err != nil {
+			continue
+		}
+		if !isExecutable(file) {
+			logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir)
+			file.Close()
+			continue
+		}
+		return
+	}
+	logrus.Debugf("O_TMPFILE cloned binary failed, falling back to mktemp(): %v", err)
+	// Finally, try a classic unlinked temporary file.
+	for _, dir := range tmpDirs {
+		file, sealFn, err = mktemp(dir)
+		if err != nil {
+			continue
+		}
+		if !isExecutable(file) {
+			logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir)
+			file.Close()
+			continue
+		}
+		return
+	}
+	return nil, nil, fmt.Errorf("could not create sealable file for cloned binary: %w", err)
+}
+
+// CloneBinary creates a "sealed" clone of a given binary, which can be used to
+// thwart attempts by the container process to gain access to host binaries
+// through procfs magic-link shenanigans. For more details on why this is
+// necessary, see CVE-2019-5736.
+func CloneBinary(src io.Reader, size int64, name, tmpDir string) (*os.File, error) {
+	logrus.Debugf("cloning %s binary (%d bytes)", name, size)
+	file, sealFn, err := getSealableFile(name, tmpDir)
+	if err != nil {
+		return nil, err
+	}
+	copied, err := system.Copy(file, src)
+	if err != nil {
+		file.Close()
+		return nil, fmt.Errorf("copy binary: %w", err)
+	} else if copied != size {
+		file.Close()
+		return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size)
+	}
+	if err := sealFn(&file); err != nil {
+		file.Close()
+		return nil, fmt.Errorf("could not seal fd: %w", err)
+	}
+	return file, nil
+}
+
+// IsCloned returns whether the given file can be guaranteed to be a safe exe.
+func IsCloned(exe *os.File) bool {
+	seals, err := unix.FcntlInt(exe.Fd(), unix.F_GET_SEALS, 0)
+	if err != nil {
+		// /proc/self/exe is probably not a memfd
+		logrus.Debugf("F_GET_SEALS on %s failed: %v", exe.Name(), err)
+		return false
+	}
+	// The memfd must have all of the base seals applied.
+	logrus.Debugf("checking %s memfd seals: 0x%x", exe.Name(), seals)
+	return seals&baseMemfdSeals == baseMemfdSeals
+}
+
+// CloneSelfExe makes a clone of the current process's binary (through
+// /proc/self/exe). This binary can then be used for "runc init" in order to
+// make sure the container process can never resolve the original runc binary.
+// For more details on why this is necessary, see CVE-2019-5736.
+func CloneSelfExe(tmpDir string) (*os.File, error) {
+	// Try to create a temporary overlayfs to produce a readonly version of
+	// /proc/self/exe that cannot be "unwrapped" by the container. In contrast
+	// to CloneBinary, this technique does not require any extra memory usage
+	// and does not have the (fairly noticeable) performance impact of copying
+	// a large binary file into a memfd.
+	//
+	// Based on some basic performance testing, the overlayfs approach has
+	// effectively no performance overhead (it is on par with both
+	// MS_BIND+MS_RDONLY and no binary cloning at all) while memfd copying adds
+	// around ~60% overhead during container startup.
+	overlayFile, err := sealedOverlayfs("/proc/self/exe", tmpDir)
+	if err == nil {
+		logrus.Debug("runc-dmz: using overlayfs for sealed /proc/self/exe") // used for tests
+		return overlayFile, nil
+	}
+	logrus.WithError(err).Debugf("could not use overlayfs for /proc/self/exe sealing -- falling back to making a temporary copy")
+
+	selfExe, err := os.Open("/proc/self/exe")
+	if err != nil {
+		return nil, fmt.Errorf("opening current binary: %w", err)
+	}
+	defer selfExe.Close()
+
+	stat, err := selfExe.Stat()
+	if err != nil {
+		return nil, fmt.Errorf("checking /proc/self/exe size: %w", err)
+	}
+	size := stat.Size()
+
+	return CloneBinary(selfExe, size, "/proc/self/exe", tmpDir)
+}
+
+// IsSelfExeCloned returns whether /proc/self/exe is a cloned binary that can
+// be guaranteed to be safe. This means that it must be a sealed memfd. Other
+// types of clones cannot be completely verified as safe.
+func IsSelfExeCloned() bool {
+	selfExe, err := os.Open("/proc/self/exe")
+	if err != nil {
+		logrus.Debugf("open /proc/self/exe failed: %v", err)
+		return false
+	}
+	defer selfExe.Close()
+	return IsCloned(selfExe)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/dmz/overlayfs_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/dmz/overlayfs_linux.go
@ -0,0 +1,122 @@
+package dmz
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+
+	"golang.org/x/sys/unix"
+
+	"github.com/opencontainers/runc/libcontainer/utils"
+)
+
+func fsopen(fsName string, flags int) (*os.File, error) {
+	// Make sure we always set O_CLOEXEC.
+	flags |= unix.FSOPEN_CLOEXEC
+	fd, err := unix.Fsopen(fsName, flags)
+	if err != nil {
+		return nil, os.NewSyscallError("fsopen "+fsName, err)
+	}
+	return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil
+}
+
+func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) {
+	// Make sure we always set O_CLOEXEC.
+	flags |= unix.FSMOUNT_CLOEXEC
+	fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs)
+	if err != nil {
+		return nil, os.NewSyscallError("fsmount "+ctx.Name(), err)
+	}
+	runtime.KeepAlive(ctx) // make sure fd is kept alive while it's used
+	return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil
+}
+
+func escapeOverlayLowerDir(path string) string {
+	// If the lowerdir path contains ":" we need to escape them, and if there
+	// were any escape characters already (\) we need to escape those first.
+	return strings.ReplaceAll(strings.ReplaceAll(path, `\`, `\\`), `:`, `\:`)
+}
+
+// sealedOverlayfs will create an internal overlayfs mount using fsopen() that
+// uses the directory containing the binary as a lowerdir and a temporary tmpfs
+// as an upperdir. There is no way to "unwrap" this (unlike MS_BIND+MS_RDONLY)
+// and so we can create a safe zero-copy sealed version of /proc/self/exe.
+// This only works for privileged users and on kernels with overlayfs and
+// fsopen() enabled.
+//
+// TODO: Since Linux 5.11, overlayfs can be created inside user namespaces so
+// it is technically possible to create an overlayfs even for rootless
+// containers. Unfortunately, this would require some ugly manual CGo+fork
+// magic so we can do this later if we feel it's really needed.
+func sealedOverlayfs(binPath, tmpDir string) (_ *os.File, Err error) {
+	// Try to do the superblock creation first to bail out early if we can't
+	// use this method.
+	overlayCtx, err := fsopen("overlay", unix.FSOPEN_CLOEXEC)
+	if err != nil {
+		return nil, err
+	}
+	defer overlayCtx.Close()
+
+	// binPath is going to be /proc/self/exe, so do a readlink to get the real
+	// path. overlayfs needs the real underlying directory for this protection
+	// mode to work properly.
+	if realPath, err := os.Readlink(binPath); err == nil {
+		binPath = realPath
+	}
+	binLowerDirPath, binName := filepath.Split(binPath)
+	// Escape any ":"s or "\"s in the path.
+	binLowerDirPath = escapeOverlayLowerDir(binLowerDirPath)
+
+	// Overlayfs requires two lowerdirs in order to run in "lower-only" mode,
+	// where writes are completely blocked. Ideally we would create a dummy
+	// tmpfs for this, but it turns out that overlayfs doesn't allow for
+	// anonymous mountns paths.
+	// NOTE: I'm working on a patch to fix this but it won't be backported.
+	dummyLowerDirPath := escapeOverlayLowerDir(tmpDir)
+
+	// Configure the lowerdirs. The binary lowerdir needs to be on the top to
+	// ensure that a file called "runc" (binName) in the dummy lowerdir doesn't
+	// mask the binary.
+	lowerDirStr := binLowerDirPath + ":" + dummyLowerDirPath
+	if err := unix.FsconfigSetString(int(overlayCtx.Fd()), "lowerdir", lowerDirStr); err != nil {
+		return nil, fmt.Errorf("fsconfig set overlayfs lowerdir=%s: %w", lowerDirStr, err)
+	}
+
+	// We don't care about xino (Linux 4.17) but it will be auto-enabled on
+	// some systems (if /run/runc and /usr/bin are on different filesystems)
+	// and this produces spurious dmesg log entries. We can safely ignore
+	// errors when disabling this because we don't actually care about the
+	// setting and we're just opportunistically disabling it.
+	_ = unix.FsconfigSetString(int(overlayCtx.Fd()), "xino", "off")
+
+	// Get an actual handle to the overlayfs.
+	if err := unix.FsconfigCreate(int(overlayCtx.Fd())); err != nil {
+		return nil, os.NewSyscallError("fsconfig create overlayfs", err)
+	}
+	overlayFd, err := fsmount(overlayCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOSUID)
+	if err != nil {
+		return nil, err
+	}
+	defer overlayFd.Close()
+
+	// Grab a handle to the binary through overlayfs.
+	exeFile, err := utils.Openat(overlayFd, binName, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return nil, fmt.Errorf("open %s from overlayfs (lowerdir=%s): %w", binName, lowerDirStr, err)
+	}
+	// NOTE: We would like to check that exeFile is the same as /proc/self/exe,
+	// except this is a little difficult. Depending on what filesystems the
+	// layers are on, overlayfs can remap the inode numbers (and it always
+	// creates its own device numbers -- see ovl_map_dev_ino) so we can't do a
+	// basic stat-based check. The only reasonable option would be to hash both
+	// files and compare them, but this would require fully reading both files
+	// which would produce a similar performance overhead to memfd cloning.
+	//
+	// Ultimately, there isn't a real attack to be worried about here. An
+	// attacker would need to be able to modify files in /usr/sbin (or wherever
+	// runc lives), at which point they could just replace the runc binary with
+	// something malicious anyway.
+	return exeFile, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
@ -0,0 +1,216 @@
+//go:build linux
+
+package system
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+	"syscall"
+	"unsafe"
+
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+type ParentDeathSignal int
+
+func (p ParentDeathSignal) Restore() error {
+	if p == 0 {
+		return nil
+	}
+	current, err := GetParentDeathSignal()
+	if err != nil {
+		return err
+	}
+	if p == current {
+		return nil
+	}
+	return p.Set()
+}
+
+func (p ParentDeathSignal) Set() error {
+	return SetParentDeathSignal(uintptr(p))
+}
+
+func Exec(cmd string, args []string, env []string) error {
+	for {
+		err := unix.Exec(cmd, args, env)
+		if err != unix.EINTR {
+			return &os.PathError{Op: "exec", Path: cmd, Err: err}
+		}
+	}
+}
+
+func execveat(fd uintptr, pathname string, args []string, env []string, flags int) error {
+	pathnamep, err := syscall.BytePtrFromString(pathname)
+	if err != nil {
+		return err
+	}
+
+	argvp, err := syscall.SlicePtrFromStrings(args)
+	if err != nil {
+		return err
+	}
+
+	envp, err := syscall.SlicePtrFromStrings(env)
+	if err != nil {
+		return err
+	}
+
+	_, _, errno := syscall.Syscall6(
+		unix.SYS_EXECVEAT,
+		fd,
+		uintptr(unsafe.Pointer(pathnamep)),
+		uintptr(unsafe.Pointer(&argvp[0])),
+		uintptr(unsafe.Pointer(&envp[0])),
+		uintptr(flags),
+		0,
+	)
+	return errno
+}
+
+func Fexecve(fd uintptr, args []string, env []string) error {
+	var err error
+	for {
+		err = execveat(fd, "", args, env, unix.AT_EMPTY_PATH)
+		if err != unix.EINTR { // nolint:errorlint // unix errors are bare
+			break
+		}
+	}
+	if err == unix.ENOSYS { // nolint:errorlint // unix errors are bare
+		// Fallback to classic /proc/self/fd/... exec.
+		return Exec("/proc/self/fd/"+strconv.Itoa(int(fd)), args, env)
+	}
+	return os.NewSyscallError("execveat", err)
+}
+
+func SetParentDeathSignal(sig uintptr) error {
+	if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
+		return err
+	}
+	return nil
+}
+
+func GetParentDeathSignal() (ParentDeathSignal, error) {
+	var sig int
+	if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
+		return -1, err
+	}
+	return ParentDeathSignal(sig), nil
+}
+
+func SetKeepCaps() error {
+	if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func ClearKeepCaps() error {
+	if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func Setctty() error {
+	if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
+		return err
+	}
+	return nil
+}
+
+// SetSubreaper sets the value i as the subreaper setting for the calling process
+func SetSubreaper(i int) error {
+	return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
+}
+
+// GetSubreaper returns the subreaper setting for the calling process
+func GetSubreaper() (int, error) {
+	var i uintptr
+
+	if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
+		return -1, err
+	}
+
+	return int(i), nil
+}
+
+func ExecutableMemfd(comment string, flags int) (*os.File, error) {
+	// Try to use MFD_EXEC first. On pre-6.3 kernels we get -EINVAL for this
+	// flag. On post-6.3 kernels, with vm.memfd_noexec=1 this ensures we get an
+	// executable memfd. For vm.memfd_noexec=2 this is a bit more complicated.
+	// The original vm.memfd_noexec=2 implementation incorrectly silently
+	// allowed MFD_EXEC[1] -- this should be fixed in 6.6. On 6.6 and newer
+	// kernels, we will get -EACCES if we try to use MFD_EXEC with
+	// vm.memfd_noexec=2 (for 6.3-6.5, -EINVAL was the intended return value).
+	//
+	// The upshot is we only need to retry without MFD_EXEC on -EINVAL because
+	// it just so happens that passing MFD_EXEC bypasses vm.memfd_noexec=2 on
+	// kernels where -EINVAL is actually a security denial.
+	memfd, err := unix.MemfdCreate(comment, flags|unix.MFD_EXEC)
+	if err == unix.EINVAL {
+		memfd, err = unix.MemfdCreate(comment, flags)
+	}
+	if err != nil {
+		if err == unix.EACCES {
+			logrus.Info("memfd_create(MFD_EXEC) failed, possibly due to vm.memfd_noexec=2 -- falling back to less secure O_TMPFILE")
+		}
+		err := os.NewSyscallError("memfd_create", err)
+		return nil, fmt.Errorf("failed to create executable memfd: %w", err)
+	}
+	return os.NewFile(uintptr(memfd), "/memfd:"+comment), nil
+}
+
+// Copy is like io.Copy except it uses sendfile(2) if the source and sink are
+// both (*os.File) as an optimisation to make copies faster.
+func Copy(dst io.Writer, src io.Reader) (copied int64, err error) {
+	dstFile, _ := dst.(*os.File)
+	srcFile, _ := src.(*os.File)
+
+	if dstFile != nil && srcFile != nil {
+		fi, err := srcFile.Stat()
+		if err != nil {
+			goto fallback
+		}
+		size := fi.Size()
+		for size > 0 {
+			n, err := unix.Sendfile(int(dstFile.Fd()), int(srcFile.Fd()), nil, int(size))
+			if n > 0 {
+				size -= int64(n)
+				copied += int64(n)
+			}
+			if err == unix.EINTR {
+				continue
+			}
+			if err != nil {
+				if copied == 0 {
+					// If we haven't copied anything so far, we can safely just
+					// fallback to io.Copy. We could always do the fallback but
+					// it's safer to error out in the case of a partial copy
+					// followed by an error (which should never happen).
+					goto fallback
+				}
+				return copied, fmt.Errorf("partial sendfile copy: %w", err)
+			}
+		}
+		return copied, nil
+	}
+
+fallback:
+	return io.Copy(dst, src)
+}
+
+// SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation.
+// checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion.
+func SetLinuxPersonality(personality int) error {
+	_, _, errno := unix.Syscall(unix.SYS_PERSONALITY, uintptr(personality), 0, 0)
+	if errno != 0 {
+		return &os.SyscallError{Syscall: "set_personality", Err: errno}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
@ -0,0 +1,127 @@
+package system
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+)
+
+// State is the status of a process.
+type State rune
+
+const ( // Only values for Linux 3.14 and later are listed here
+	Dead        State = 'X'
+	DiskSleep   State = 'D'
+	Running     State = 'R'
+	Sleeping    State = 'S'
+	Stopped     State = 'T'
+	TracingStop State = 't'
+	Zombie      State = 'Z'
+	Parked      State = 'P'
+	Idle        State = 'I'
+)
+
+// String forms of the state from proc(5)'s documentation for
+// /proc/[pid]/status' "State" field.
+func (s State) String() string {
+	switch s {
+	case Dead:
+		return "dead"
+	case DiskSleep:
+		return "disk sleep"
+	case Running:
+		return "running"
+	case Sleeping:
+		return "sleeping"
+	case Stopped:
+		return "stopped"
+	case TracingStop:
+		return "tracing stop"
+	case Zombie:
+		return "zombie"
+	case Parked:
+		return "parked"
+	case Idle:
+		return "idle" // kernel thread
+	default:
+		return fmt.Sprintf("unknown (%c)", s)
+	}
+}
+
+// Stat_t represents the information from /proc/[pid]/stat, as
+// described in proc(5) with names based on the /proc/[pid]/status
+// fields.
+type Stat_t struct {
+	// Name is the command run by the process.
+	Name string
+
+	// State is the state of the process.
+	State State
+
+	// StartTime is the number of clock ticks after system boot (since
+	// Linux 2.6).
+	StartTime uint64
+}
+
+// Stat returns a Stat_t instance for the specified process.
+func Stat(pid int) (stat Stat_t, err error) {
+	bytes, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
+	if err != nil {
+		return stat, err
+	}
+	return parseStat(string(bytes))
+}
+
+func parseStat(data string) (stat Stat_t, err error) {
+	// Example:
+	// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
+	// The fields are space-separated, see full description in proc(5).
+	//
+	// We are only interested in:
+	//  * field 2: process name. It is the only field enclosed into
+	//    parenthesis, as it can contain spaces (and parenthesis) inside.
+	//  * field 3: process state, a single character (%c)
+	//  * field 22: process start time, a long unsigned integer (%llu).
+
+	// 1. Look for the first '(' and the last ')' first, what's in between is Name.
+	//    We expect at least 20 fields and a space after the last one.
+
+	const minAfterName = 20*2 + 1 // the min field is '0 '.
+
+	first := strings.IndexByte(data, '(')
+	if first < 0 || first+minAfterName >= len(data) {
+		return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
+	}
+
+	last := strings.LastIndexByte(data, ')')
+	if last <= first || last+minAfterName >= len(data) {
+		return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
+	}
+
+	stat.Name = data[first+1 : last]
+
+	// 2. Remove fields 1 and 2 and a space after. State is right after.
+	data = data[last+2:]
+	stat.State = State(data[0])
+
+	// 3. StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces.
+	skipSpaces := 22 - 3
+	for first = 0; skipSpaces > 0 && first < len(data); first++ {
+		if data[first] == ' ' {
+			skipSpaces--
+		}
+	}
+	// Now first points to StartTime; look for space right after.
+	i := strings.IndexByte(data[first:], ' ')
+	if i < 0 {
+		return stat, fmt.Errorf("invalid stat data (too short): %q", data)
+	}
+	stat.StartTime, err = strconv.ParseUint(data[first:first+i], 10, 64)
+	if err != nil {
+		return stat, fmt.Errorf("invalid stat data (bad start time): %w", err)
+	}
+
+	return stat, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux.go
@ -0,0 +1,15 @@
+//go:build go1.23
+
+package system
+
+import (
+	"syscall"
+)
+
+// ClearRlimitNofileCache clears go runtime's nofile rlimit cache. The argument
+// is process RLIMIT_NOFILE values. Relies on go.dev/cl/588076.
+func ClearRlimitNofileCache(lim *syscall.Rlimit) {
+	// Ignore the return values since we only need to clean the cache,
+	// the limit is going to be set via unix.Prlimit elsewhere.
+	_ = syscall.Setrlimit(syscall.RLIMIT_NOFILE, lim)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux_go122.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux_go122.go
@ -0,0 +1,27 @@
+//go:build !go1.23
+
+// TODO: remove this file once go 1.22 is no longer supported.
+
+package system
+
+import (
+	"sync/atomic"
+	"syscall"
+	_ "unsafe" // Needed for go:linkname to work.
+)
+
+//go:linkname syscallOrigRlimitNofile syscall.origRlimitNofile
+var syscallOrigRlimitNofile atomic.Pointer[syscall.Rlimit]
+
+// ClearRlimitNofileCache clears go runtime's nofile rlimit cache.
+// The argument is process RLIMIT_NOFILE values.
+func ClearRlimitNofileCache(_ *syscall.Rlimit) {
+	// As reported in issue #4195, the new version of go runtime(since 1.19)
+	// will cache rlimit-nofile. Before executing execve, the rlimit-nofile
+	// of the process will be restored with the cache. In runc, this will
+	// cause the rlimit-nofile setting by the parent process for the container
+	// to become invalid. It can be solved by clearing this cache. But
+	// unfortunately, go stdlib doesn't provide such function, so we need to
+	// link to the private var `origRlimitNofile` in package syscall to hack.
+	syscallOrigRlimitNofile.Store(nil)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
@ -0,0 +1,119 @@
+package utils
+
+/*
+ * Copyright 2016, 2017 SUSE LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+
+	"golang.org/x/sys/unix"
+)
+
+// MaxNameLen is the maximum length of the name of a file descriptor being sent
+// using SendFile. The name of the file handle returned by RecvFile will never be
+// larger than this value.
+const MaxNameLen = 4096
+
+// oobSpace is the size of the oob slice required to store a single FD. Note
+// that unix.UnixRights appears to make the assumption that fd is always int32,
+// so sizeof(fd) = 4.
+var oobSpace = unix.CmsgSpace(4)
+
+// RecvFile waits for a file descriptor to be sent over the given AF_UNIX
+// socket. The file name of the remote file descriptor will be recreated
+// locally (it is sent as non-auxiliary data in the same payload).
+func RecvFile(socket *os.File) (_ *os.File, Err error) {
+	name := make([]byte, MaxNameLen)
+	oob := make([]byte, oobSpace)
+
+	sockfd := socket.Fd()
+	n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC)
+	if err != nil {
+		return nil, err
+	}
+	if n >= MaxNameLen || oobn != oobSpace {
+		return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
+	}
+	// Truncate.
+	name = name[:n]
+	oob = oob[:oobn]
+
+	scms, err := unix.ParseSocketControlMessage(oob)
+	if err != nil {
+		return nil, err
+	}
+
+	// We cannot control how many SCM_RIGHTS we receive, and upon receiving
+	// them all of the descriptors are installed in our fd table, so we need to
+	// parse all of the SCM_RIGHTS we received in order to close all of the
+	// descriptors on error.
+	var fds []int
+	defer func() {
+		for i, fd := range fds {
+			if i == 0 && Err == nil {
+				// Only close the first one on error.
+				continue
+			}
+			// Always close extra ones.
+			_ = unix.Close(fd)
+		}
+	}()
+	var lastErr error
+	for _, scm := range scms {
+		if scm.Header.Type == unix.SCM_RIGHTS {
+			scmFds, err := unix.ParseUnixRights(&scm)
+			if err != nil {
+				lastErr = err
+			} else {
+				fds = append(fds, scmFds...)
+			}
+		}
+	}
+	if lastErr != nil {
+		return nil, lastErr
+	}
+
+	// We do this after collecting the fds to make sure we close them all when
+	// returning an error here.
+	if len(scms) != 1 {
+		return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
+	}
+	if len(fds) != 1 {
+		return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
+	}
+	return os.NewFile(uintptr(fds[0]), string(name)), nil
+}
+
+// SendFile sends a file over the given AF_UNIX socket. file.Name() is also
+// included so that if the other end uses RecvFile, the file will have the same
+// name information.
+func SendFile(socket *os.File, file *os.File) error {
+	name := file.Name()
+	if len(name) >= MaxNameLen {
+		return fmt.Errorf("sendfd: filename too long: %s", name)
+	}
+	err := SendRawFd(socket, name, file.Fd())
+	runtime.KeepAlive(file)
+	return err
+}
+
+// SendRawFd sends a specific file descriptor over the given AF_UNIX socket.
+func SendRawFd(socket *os.File, msg string, fd uintptr) error {
+	oob := unix.UnixRights(int(fd))
+	return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
@ -0,0 +1,115 @@
+package utils
+
+import (
+	"encoding/json"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"golang.org/x/sys/unix"
+)
+
+const (
+	exitSignalOffset = 128
+)
+
+// ExitStatus returns the correct exit status for a process based on if it
+// was signaled or exited cleanly
+func ExitStatus(status unix.WaitStatus) int {
+	if status.Signaled() {
+		return exitSignalOffset + int(status.Signal())
+	}
+	return status.ExitStatus()
+}
+
+// WriteJSON writes the provided struct v to w using standard json marshaling
+// without a trailing newline. This is used instead of json.Encoder because
+// there might be a problem in json decoder in some cases, see:
+// https://github.com/docker/docker/issues/14203#issuecomment-174177790
+func WriteJSON(w io.Writer, v interface{}) error {
+	data, err := json.Marshal(v)
+	if err != nil {
+		return err
+	}
+	_, err = w.Write(data)
+	return err
+}
+
+// CleanPath makes a path safe for use with filepath.Join. This is done by not
+// only cleaning the path, but also (if the path is relative) adding a leading
+// '/' and cleaning it (then removing the leading '/'). This ensures that a
+// path resulting from prepending another path will always resolve to lexically
+// be a subdirectory of the prefixed path. This is all done lexically, so paths
+// that include symlinks won't be safe as a result of using CleanPath.
+func CleanPath(path string) string {
+	// Deal with empty strings nicely.
+	if path == "" {
+		return ""
+	}
+
+	// Ensure that all paths are cleaned (especially problematic ones like
+	// "/../../../../../" which can cause lots of issues).
+	path = filepath.Clean(path)
+
+	// If the path isn't absolute, we need to do more processing to fix paths
+	// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
+	// paths to relative ones.
+	if !filepath.IsAbs(path) {
+		path = filepath.Clean(string(os.PathSeparator) + path)
+		// This can't fail, as (by definition) all paths are relative to root.
+		path, _ = filepath.Rel(string(os.PathSeparator), path)
+	}
+
+	// Clean the path again for good measure.
+	return filepath.Clean(path)
+}
+
+// stripRoot returns the passed path, stripping the root path if it was
+// (lexicially) inside it. Note that both passed paths will always be treated
+// as absolute, and the returned path will also always be absolute. In
+// addition, the paths are cleaned before stripping the root.
+func stripRoot(root, path string) string {
+	// Make the paths clean and absolute.
+	root, path = CleanPath("/"+root), CleanPath("/"+path)
+	switch {
+	case path == root:
+		path = "/"
+	case root == "/":
+		// do nothing
+	case strings.HasPrefix(path, root+"/"):
+		path = strings.TrimPrefix(path, root+"/")
+	}
+	return CleanPath("/" + path)
+}
+
+// SearchLabels searches through a list of key=value pairs for a given key,
+// returning its value, and the binary flag telling whether the key exist.
+func SearchLabels(labels []string, key string) (string, bool) {
+	key += "="
+	for _, s := range labels {
+		if strings.HasPrefix(s, key) {
+			return s[len(key):], true
+		}
+	}
+	return "", false
+}
+
+// Annotations returns the bundle path and user defined annotations from the
+// libcontainer state.  We need to remove the bundle because that is a label
+// added by libcontainer.
+func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
+	userAnnotations = make(map[string]string)
+	for _, l := range labels {
+		name, value, ok := strings.Cut(l, "=")
+		if !ok {
+			continue
+		}
+		if name == "bundle" {
+			bundle = value
+		} else {
+			userAnnotations[name] = value
+		}
+	}
+	return
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
@ -0,0 +1,360 @@
+//go:build !windows
+
+package utils
+
+import (
+	"fmt"
+	"math"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	_ "unsafe" // for go:linkname
+
+	securejoin "github.com/cyphar/filepath-securejoin"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+// EnsureProcHandle returns whether or not the given file handle is on procfs.
+func EnsureProcHandle(fh *os.File) error {
+	var buf unix.Statfs_t
+	if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
+		return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
+	}
+	if buf.Type != unix.PROC_SUPER_MAGIC {
+		return fmt.Errorf("%s is not on procfs", fh.Name())
+	}
+	return nil
+}
+
+var (
+	haveCloseRangeCloexecBool bool
+	haveCloseRangeCloexecOnce sync.Once
+)
+
+func haveCloseRangeCloexec() bool {
+	haveCloseRangeCloexecOnce.Do(func() {
+		// Make sure we're not closing a random file descriptor.
+		tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0)
+		if err != nil {
+			return
+		}
+		defer unix.Close(tmpFd)
+
+		err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC)
+		// Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC).
+		// -ENOSYS and -EINVAL ultimately mean we don't have support, but any
+		// other potential error would imply that even the most basic close
+		// operation wouldn't work.
+		haveCloseRangeCloexecBool = err == nil
+	})
+	return haveCloseRangeCloexecBool
+}
+
+type fdFunc func(fd int)
+
+// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
+// the current process.
+func fdRangeFrom(minFd int, fn fdFunc) error {
+	procSelfFd, closer := ProcThreadSelf("fd")
+	defer closer()
+
+	fdDir, err := os.Open(procSelfFd)
+	if err != nil {
+		return err
+	}
+	defer fdDir.Close()
+
+	if err := EnsureProcHandle(fdDir); err != nil {
+		return err
+	}
+
+	fdList, err := fdDir.Readdirnames(-1)
+	if err != nil {
+		return err
+	}
+	for _, fdStr := range fdList {
+		fd, err := strconv.Atoi(fdStr)
+		// Ignore non-numeric file names.
+		if err != nil {
+			continue
+		}
+		// Ignore descriptors lower than our specified minimum.
+		if fd < minFd {
+			continue
+		}
+		// Ignore the file descriptor we used for readdir, as it will be closed
+		// when we return.
+		if uintptr(fd) == fdDir.Fd() {
+			continue
+		}
+		// Run the closure.
+		fn(fd)
+	}
+	return nil
+}
+
+// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
+// equal to minFd in the current process.
+func CloseExecFrom(minFd int) error {
+	// Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
+	if haveCloseRangeCloexec() {
+		err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
+		return os.NewSyscallError("close_range", err)
+	}
+	// Otherwise, fall back to the standard loop.
+	return fdRangeFrom(minFd, unix.CloseOnExec)
+}
+
+//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
+
+// In order to make sure we do not close the internal epoll descriptors the Go
+// runtime uses, we need to ensure that we skip descriptors that match
+// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
+// unfortunately there's no other way to be sure we're only keeping the file
+// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
+func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
+
+// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
+// current process, except for those critical to Go's runtime (such as the
+// netpoll management descriptors).
+//
+// NOTE: That this function is incredibly dangerous to use in most Go code, as
+// closing file descriptors from underneath *os.File handles can lead to very
+// bad behaviour (the closed file descriptor can be re-used and then any
+// *os.File operations would apply to the wrong file). This function is only
+// intended to be called from the last stage of runc init.
+func UnsafeCloseFrom(minFd int) error {
+	// We cannot use close_range(2) even if it is available, because we must
+	// not close some file descriptors.
+	return fdRangeFrom(minFd, func(fd int) {
+		if runtime_IsPollDescriptor(uintptr(fd)) {
+			// These are the Go runtimes internal netpoll file descriptors.
+			// These file descriptors are operated on deep in the Go scheduler,
+			// and closing those files from underneath Go can result in panics.
+			// There is no issue with keeping them because they are not
+			// executable and are not useful to an attacker anyway. Also we
+			// don't have any choice.
+			return
+		}
+		// There's nothing we can do about errors from close(2), and the
+		// only likely error to be seen is EBADF which indicates the fd was
+		// already closed (in which case, we got what we wanted).
+		_ = unix.Close(fd)
+	})
+}
+
+// NewSockPair returns a new SOCK_STREAM unix socket pair.
+func NewSockPair(name string) (parent, child *os.File, err error) {
+	fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
+	if err != nil {
+		return nil, nil, err
+	}
+	return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
+}
+
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
+// corresponding to the unsafePath resolved within the root. Before passing the
+// fd, this path is verified to have been inside the root -- so operating on it
+// through the passed fdpath should be safe. Do not access this path through
+// the original path strings, and do not attempt to use the pathname outside of
+// the passed closure (the file handle will be freed once the closure returns).
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
+	// Remove the root then forcefully resolve inside the root.
+	unsafePath = stripRoot(root, unsafePath)
+	path, err := securejoin.SecureJoin(root, unsafePath)
+	if err != nil {
+		return fmt.Errorf("resolving path inside rootfs failed: %w", err)
+	}
+
+	procSelfFd, closer := ProcThreadSelf("fd/")
+	defer closer()
+
+	// Open the target path.
+	fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return fmt.Errorf("open o_path procfd: %w", err)
+	}
+	defer fh.Close()
+
+	procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd())))
+	// Double-check the path is the one we expected.
+	if realpath, err := os.Readlink(procfd); err != nil {
+		return fmt.Errorf("procfd verification failed: %w", err)
+	} else if realpath != path {
+		return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
+	}
+
+	return fn(procfd)
+}
+
+type ProcThreadSelfCloser func()
+
+var (
+	haveProcThreadSelf     bool
+	haveProcThreadSelfOnce sync.Once
+)
+
+// ProcThreadSelf returns a string that is equivalent to
+// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where
+// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin,
+// meaning that the passed string needs to be trusted. The caller _must_ call
+// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread)
+// *only once* after it has finished using the returned path string.
+func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
+	haveProcThreadSelfOnce.Do(func() {
+		if _, err := os.Stat("/proc/thread-self/"); err == nil {
+			haveProcThreadSelf = true
+		} else {
+			logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
+		}
+	})
+
+	// We need to lock our thread until the caller is done with the path string
+	// because any non-atomic operation on the path (such as opening a file,
+	// then reading it) could be interrupted by the Go runtime where the
+	// underlying thread is swapped out and the original thread is killed,
+	// resulting in pull-your-hair-out-hard-to-debug issues in the caller. In
+	// addition, the pre-3.17 fallback makes everything non-atomic because the
+	// same thing could happen between unix.Gettid() and the path operations.
+	//
+	// In theory, we don't need to lock in the atomic user case when using
+	// /proc/thread-self/, but it's better to be safe than sorry (and there are
+	// only one or two truly atomic users of /proc/thread-self/).
+	runtime.LockOSThread()
+
+	threadSelf := "/proc/thread-self/"
+	if !haveProcThreadSelf {
+		// Pre-3.17 kernels did not have /proc/thread-self, so do it manually.
+		threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/"
+		if _, err := os.Stat(threadSelf); err != nil {
+			// Unfortunately, this code is called from rootfs_linux.go where we
+			// are running inside the pid namespace of the container but /proc
+			// is the host's procfs. Unfortunately there is no real way to get
+			// the correct tid to use here (the kernel age means we cannot do
+			// things like set up a private fsopen("proc") -- even scanning
+			// NSpid in all of the tasks in /proc/self/task/*/status requires
+			// Linux 4.1).
+			//
+			// So, we just have to assume that /proc/self is acceptable in this
+			// one specific case.
+			if os.Getpid() == 1 {
+				logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
+			} else {
+				// This should never happen, but the fallback should work in most cases...
+				logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
+			}
+			threadSelf = "/proc/self/"
+		}
+	}
+	return threadSelf + subpath, runtime.UnlockOSThread
+}
+
+// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to
+// create a /proc/thread-self handle for given file descriptor.
+//
+// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
+// without using fmt.Sprintf to avoid unneeded overhead.
+func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
+	return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
+}
+
+// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"),
+// but properly handling the case where path or root are "/".
+//
+// NOTE: The return value only make sense if the path doesn't contain "..".
+func IsLexicallyInRoot(root, path string) bool {
+	if root != "/" {
+		root += "/"
+	}
+	if path != "/" {
+		path += "/"
+	}
+	return strings.HasPrefix(path, root)
+}
+
+// MkdirAllInRootOpen attempts to make
+//
+//	path, _ := securejoin.SecureJoin(root, unsafePath)
+//	os.MkdirAll(path, mode)
+//	os.Open(path)
+//
+// safer against attacks where components in the path are changed between
+// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
+// try to detect any symlink components in the path while we are doing the
+// MkdirAll.
+//
+// NOTE: If unsafePath is a subpath of root, we assume that you have already
+// called SecureJoin and so we use the provided path verbatim without resolving
+// any symlinks (this is done in a way that avoids symlink-exchange races).
+// This means that the path also must not contain ".." elements, otherwise an
+// error will occur.
+//
+// This uses securejoin.MkdirAllHandle under the hood, but it has special
+// handling if unsafePath has already been scoped within the rootfs (this is
+// needed for a lot of runc callers and fixing this would require reworking a
+// lot of path logic).
+func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (_ *os.File, Err error) {
+	// If the path is already "within" the root, get the path relative to the
+	// root and use that as the unsafe path. This is necessary because a lot of
+	// MkdirAllInRootOpen callers have already done SecureJoin, and refactoring
+	// all of them to stop using these SecureJoin'd paths would require a fair
+	// amount of work.
+	// TODO(cyphar): Do the refactor to libpathrs once it's ready.
+	if IsLexicallyInRoot(root, unsafePath) {
+		subPath, err := filepath.Rel(root, unsafePath)
+		if err != nil {
+			return nil, err
+		}
+		unsafePath = subPath
+	}
+
+	// Check for any silly mode bits.
+	if mode&^0o7777 != 0 {
+		return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
+	}
+	// Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if
+	// passed. While it would make sense to return an error in that case (since
+	// the user has asked for a mode that won't be applied), for compatibility
+	// reasons we have to ignore these bits.
+	if ignoredBits := mode &^ 0o1777; ignoredBits != 0 {
+		logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits)
+		mode &= 0o1777
+	}
+
+	rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return nil, fmt.Errorf("open root handle: %w", err)
+	}
+	defer rootDir.Close()
+
+	return securejoin.MkdirAllHandle(rootDir, unsafePath, mode)
+}
+
+// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
+// returned handle, for callers that don't need to use it.
+func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) error {
+	f, err := MkdirAllInRootOpen(root, unsafePath, mode)
+	if err == nil {
+		_ = f.Close()
+	}
+	return err
+}
+
+// Openat is a Go-friendly openat(2) wrapper.
+func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
+	dirFd := unix.AT_FDCWD
+	if dir != nil {
+		dirFd = int(dir.Fd())
+	}
+	flags |= unix.O_CLOEXEC
+
+	fd, err := unix.Openat(dirFd, path, flags, mode)
+	if err != nil {
+		return nil, &os.PathError{Op: "openat", Path: path, Err: err}
+	}
+	return os.NewFile(uintptr(fd), dir.Name()+"/"+path), nil
+}
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -14,6 +14,9 @@ github.com/NVIDIA/go-nvml/pkg/nvml/mock
 # github.com/cpuguy83/go-md2man/v2 v2.0.5
 ## explicit; go 1.11
 github.com/cpuguy83/go-md2man/v2/md2man
+# github.com/cyphar/filepath-securejoin v0.4.1
+## explicit; go 1.18
+github.com/cyphar/filepath-securejoin
 # github.com/davecgh/go-spew v1.1.1
 ## explicit
 github.com/davecgh/go-spew/spew
@ -30,6 +33,11 @@ github.com/google/uuid
 # github.com/moby/sys/symlink v0.3.0
 ## explicit; go 1.17
 github.com/moby/sys/symlink
+# github.com/opencontainers/runc v1.2.5
+## explicit; go 1.22
+github.com/opencontainers/runc/libcontainer/dmz
+github.com/opencontainers/runc/libcontainer/system
+github.com/opencontainers/runc/libcontainer/utils
 # github.com/opencontainers/runtime-spec v1.2.0
 ## explicit
 github.com/opencontainers/runtime-spec/specs-go
@ -38,8 +46,6 @@ github.com/opencontainers/runtime-spec/specs-go
 github.com/opencontainers/runtime-tools/generate
 github.com/opencontainers/runtime-tools/generate/seccomp
 github.com/opencontainers/runtime-tools/validate/capabilities
-# github.com/opencontainers/selinux v1.11.0
-## explicit; go 1.19
 # github.com/pelletier/go-toml v1.9.5
 ## explicit; go 1.12
 github.com/pelletier/go-toml