Use libcontainer execseal to run ldconfig

This change copies ldconfig into a memfd before executing it from
the createContainer hook.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2025-02-25 16:58:30 +02:00
parent 9429fbac5f
commit 52b9631333
No known key found for this signature in database
34 changed files with 3939 additions and 6 deletions

View File

@ -0,0 +1,57 @@
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"fmt"
"os"
"strconv"
"syscall"
"github.com/opencontainers/runc/libcontainer/dmz"
)
// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it.
func (m command) SafeExec(path string, args []string, envv []string) error {
safeExe, err := cloneBinary(path)
if err != nil {
m.logger.Warningf("Failed to clone binary %q: %v; falling back to Exec", path, err)
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(path, args, envv)
}
defer safeExe.Close()
exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(exePath, args, envv)
}
func cloneBinary(path string) (*os.File, error) {
exe, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("opening current binary: %w", err)
}
defer exe.Close()
stat, err := exe.Stat()
if err != nil {
return nil, fmt.Errorf("checking %v size: %w", path, err)
}
size := stat.Size()
return dmz.CloneBinary(exe, size, path, os.TempDir())
}

View File

@ -0,0 +1,29 @@
//go:build !linux
// +build !linux
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import "syscall"
// SafeExec is not implemented on non-linux systems and forwards directly to the
// Exec syscall.
func (m *command) SafeExec(path string, args []string, envv []string) error {
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(path, args, envv)
}

View File

@ -22,7 +22,6 @@ import (
"os"
"path/filepath"
"strings"
"syscall"
"github.com/urfave/cli/v2"
@ -143,8 +142,7 @@ func (m command) run(c *cli.Context, cfg *options) error {
// be configured to use a different config file by default.
args = append(args, "-f", "/etc/ld.so.conf")
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
return syscall.Exec(ldconfigPath, args, nil)
return m.SafeExec(ldconfigPath, args, nil)
}
// resolveLDConfigPath determines the LDConfig path to use for the system.

3
go.mod
View File

@ -6,6 +6,7 @@ require (
github.com/NVIDIA/go-nvlib v0.7.1
github.com/NVIDIA/go-nvml v0.12.4-1
github.com/moby/sys/symlink v0.3.0
github.com/opencontainers/runc v1.2.5
github.com/opencontainers/runtime-spec v1.2.0
github.com/pelletier/go-toml v1.9.5
github.com/sirupsen/logrus v1.9.3
@ -19,13 +20,13 @@ require (
require (
github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
github.com/cyphar/filepath-securejoin v0.4.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect

4
go.sum
View File

@ -7,6 +7,8 @@ github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2y
github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@ -31,6 +33,8 @@ github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34
github.com/moby/sys/symlink v0.3.0 h1:GZX89mEZ9u53f97npBy4Rc3vJKj7JBDj/PN2I22GrNU=
github.com/moby/sys/symlink v0.3.0/go.mod h1:3eNdhduHmYPcgsJtZXW1W4XUJdZGBIkttZ8xKqPUJq0=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runc v1.2.5 h1:8KAkq3Wrem8bApgOHyhRI/8IeLXIfmZ6Qaw6DNSLnA4=
github.com/opencontainers/runc v1.2.5/go.mod h1:dOQeFo29xZKBNeRBI0B19mJtfHv68YgCTh1X+YphA+4=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=

View File

@ -0,0 +1,256 @@
# Changelog #
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased] ##
## [0.4.1] - 2025-01-28 ##
### Fixed ###
- The restrictions added for `root` paths passed to `SecureJoin` in 0.4.0 was
found to be too strict and caused some regressions when folks tried to
update, so this restriction has been relaxed to only return an error if the
path contains a `..` component. We still recommend users use `filepath.Clean`
(and even `filepath.EvalSymlinks`) on the `root` path they are using, but at
least you will no longer be punished for "trivial" unclean paths.
## [0.4.0] - 2025-01-13 ##
### Breaking ####
- `SecureJoin(VFS)` will now return an error if the provided `root` is not a
`filepath.Clean`'d path.
While it is ultimately the responsibility of the caller to ensure the root is
a safe path to use, passing a path like `/symlink/..` as a root would result
in the `SecureJoin`'d path being placed in `/` even though `/symlink/..`
might be a different directory, and so we should more strongly discourage
such usage.
All major users of `securejoin.SecureJoin` already ensure that the paths they
provide are safe (and this is ultimately a question of user error), but
removing this foot-gun is probably a good idea. Of course, this is
necessarily a breaking API change (though we expect no real users to be
affected by it).
Thanks to [Erik Sjölund](https://github.com/eriksjolund), who initially
reported this issue as a possible security issue.
- `MkdirAll` and `MkdirHandle` now take an `os.FileMode`-style mode argument
instead of a raw `unix.S_*`-style mode argument, which may cause compile-time
type errors depending on how you use `filepath-securejoin`. For most users,
there will be no change in behaviour aside from the type change (as the
bottom `0o777` bits are the same in both formats, and most users are probably
only using those bits).
However, if you were using `unix.S_ISVTX` to set the sticky bit with
`MkdirAll(Handle)` you will need to switch to `os.ModeSticky` otherwise you
will get a runtime error with this update. In addition, the error message you
will get from passing `unix.S_ISUID` and `unix.S_ISGID` will be different as
they are treated as invalid bits now (note that previously passing said bits
was also an error).
## [0.3.6] - 2024-12-17 ##
### Compatibility ###
- The minimum Go version requirement for `filepath-securejoin` is now Go 1.18
(we use generics internally).
For reference, `filepath-securejoin@v0.3.0` somewhat-arbitrarily bumped the
Go version requirement to 1.21.
While we did make some use of Go 1.21 stdlib features (and in principle Go
versions <= 1.21 are no longer even supported by upstream anymore), some
downstreams have complained that the version bump has meant that they have to
do workarounds when backporting fixes that use the new `filepath-securejoin`
API onto old branches. This is not an ideal situation, but since using this
library is probably better for most downstreams than a hand-rolled
workaround, we now have compatibility shims that allow us to build on older
Go versions.
- Lower minimum version requirement for `golang.org/x/sys` to `v0.18.0` (we
need the wrappers for `fsconfig(2)`), which should also make backporting
patches to older branches easier.
## [0.3.5] - 2024-12-06 ##
### Fixed ###
- `MkdirAll` will now no longer return an `EEXIST` error if two racing
processes are creating the same directory. We will still verify that the path
is a directory, but this will avoid spurious errors when multiple threads or
programs are trying to `MkdirAll` the same path. opencontainers/runc#4543
## [0.3.4] - 2024-10-09 ##
### Fixed ###
- Previously, some testing mocks we had resulted in us doing `import "testing"`
in non-`_test.go` code, which made some downstreams like Kubernetes unhappy.
This has been fixed. (#32)
## [0.3.3] - 2024-09-30 ##
### Fixed ###
- The mode and owner verification logic in `MkdirAll` has been removed. This
was originally intended to protect against some theoretical attacks but upon
further consideration these protections don't actually buy us anything and
they were causing spurious errors with more complicated filesystem setups.
- The "is the created directory empty" logic in `MkdirAll` has also been
removed. This was not causing us issues yet, but some pseudofilesystems (such
as `cgroup`) create non-empty directories and so this logic would've been
wrong for such cases.
## [0.3.2] - 2024-09-13 ##
### Changed ###
- Passing the `S_ISUID` or `S_ISGID` modes to `MkdirAllInRoot` will now return
an explicit error saying that those bits are ignored by `mkdirat(2)`. In the
past a different error was returned, but since the silent ignoring behaviour
is codified in the man pages a more explicit error seems apt. While silently
ignoring these bits would be the most compatible option, it could lead to
users thinking their code sets these bits when it doesn't. Programs that need
to deal with compatibility can mask the bits themselves. (#23, #25)
### Fixed ###
- If a directory has `S_ISGID` set, then all child directories will have
`S_ISGID` set when created and a different gid will be used for any inode
created under the directory. Previously, the "expected owner and mode"
validation in `securejoin.MkdirAll` did not correctly handle this. We now
correctly handle this case. (#24, #25)
## [0.3.1] - 2024-07-23 ##
### Changed ###
- By allowing `Open(at)InRoot` to opt-out of the extra work done by `MkdirAll`
to do the necessary "partial lookups", `Open(at)InRoot` now does less work
for both implementations (resulting in a many-fold decrease in the number of
operations for `openat2`, and a modest improvement for non-`openat2`) and is
far more guaranteed to match the correct `openat2(RESOLVE_IN_ROOT)`
behaviour.
- We now use `readlinkat(fd, "")` where possible. For `Open(at)InRoot` this
effectively just means that we no longer risk getting spurious errors during
rename races. However, for our hardened procfs handler, this in theory should
prevent mount attacks from tricking us when doing magic-link readlinks (even
when using the unsafe host `/proc` handle). Unfortunately `Reopen` is still
potentially vulnerable to those kinds of somewhat-esoteric attacks.
Technically this [will only work on post-2.6.39 kernels][linux-readlinkat-emptypath]
but it seems incredibly unlikely anyone is using `filepath-securejoin` on a
pre-2011 kernel.
### Fixed ###
- Several improvements were made to the errors returned by `Open(at)InRoot` and
`MkdirAll` when dealing with invalid paths under the emulated (ie.
non-`openat2`) implementation. Previously, some paths would return the wrong
error (`ENOENT` when the last component was a non-directory), and other paths
would be returned as though they were acceptable (trailing-slash components
after a non-directory would be ignored by `Open(at)InRoot`).
These changes were done to match `openat2`'s behaviour and purely is a
consistency fix (most users are going to be using `openat2` anyway).
[linux-readlinkat-emptypath]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=65cfc6722361570bfe255698d9cd4dccaf47570d
## [0.3.0] - 2024-07-11 ##
### Added ###
- A new set of `*os.File`-based APIs have been added. These are adapted from
[libpathrs][] and we strongly suggest using them if possible (as they provide
far more protection against attacks than `SecureJoin`):
- `Open(at)InRoot` resolves a path inside a rootfs and returns an `*os.File`
handle to the path. Note that the handle returned is an `O_PATH` handle,
which cannot be used for reading or writing (as well as some other
operations -- [see open(2) for more details][open.2])
- `Reopen` takes an `O_PATH` file handle and safely re-opens it to upgrade
it to a regular handle. This can also be used with non-`O_PATH` handles,
but `O_PATH` is the most obvious application.
- `MkdirAll` is an implementation of `os.MkdirAll` that is safe to use to
create a directory tree within a rootfs.
As these are new APIs, they may change in the future. However, they should be
safe to start migrating to as we have extensive tests ensuring they behave
correctly and are safe against various races and other attacks.
[libpathrs]: https://github.com/openSUSE/libpathrs
[open.2]: https://www.man7.org/linux/man-pages/man2/open.2.html
## [0.2.5] - 2024-05-03 ##
### Changed ###
- Some minor changes were made to how lexical components (like `..` and `.`)
are handled during path generation in `SecureJoin`. There is no behaviour
change as a result of this fix (the resulting paths are the same).
### Fixed ###
- The error returned when we hit a symlink loop now references the correct
path. (#10)
## [0.2.4] - 2023-09-06 ##
### Security ###
- This release fixes a potential security issue in filepath-securejoin when
used on Windows ([GHSA-6xv5-86q9-7xr8][], which could be used to generate
paths outside of the provided rootfs in certain cases), as well as improving
the overall behaviour of filepath-securejoin when dealing with Windows paths
that contain volume names. Thanks to Paulo Gomes for discovering and fixing
these issues.
### Fixed ###
- Switch to GitHub Actions for CI so we can test on Windows as well as Linux
and MacOS.
[GHSA-6xv5-86q9-7xr8]: https://github.com/advisories/GHSA-6xv5-86q9-7xr8
## [0.2.3] - 2021-06-04 ##
### Changed ###
- Switch to Go 1.13-style `%w` error wrapping, letting us drop the dependency
on `github.com/pkg/errors`.
## [0.2.2] - 2018-09-05 ##
### Changed ###
- Use `syscall.ELOOP` as the base error for symlink loops, rather than our own
(internal) error. This allows callers to more easily use `errors.Is` to check
for this case.
## [0.2.1] - 2018-09-05 ##
### Fixed ###
- Use our own `IsNotExist` implementation, which lets us handle `ENOTDIR`
properly within `SecureJoin`.
## [0.2.0] - 2017-07-19 ##
We now have 100% test coverage!
### Added ###
- Add a `SecureJoinVFS` API that can be used for mocking (as we do in our new
tests) or for implementing custom handling of lookup operations (such as for
rootless containers, where work is necessary to access directories with weird
modes because we don't have `CAP_DAC_READ_SEARCH` or `CAP_DAC_OVERRIDE`).
## 0.1.0 - 2017-07-19
This is our first release of `github.com/cyphar/filepath-securejoin`,
containing a full implementation with a coverage of 93.5% (the only missing
cases are the error cases, which are hard to mocktest at the moment).
[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...HEAD
[0.4.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.0...v0.4.1
[0.4.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.6...v0.4.0
[0.3.6]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.5...v0.3.6
[0.3.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.4...v0.3.5
[0.3.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.3...v0.3.4
[0.3.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.2...v0.3.3
[0.3.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...v0.3.2
[0.3.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.0...v0.3.1
[0.3.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.5...v0.3.0
[0.2.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.4...v0.2.5
[0.2.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.3...v0.2.4
[0.2.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.2...v0.2.3
[0.2.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.1...v0.2.2
[0.2.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.0...v0.2.1
[0.2.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.1.0...v0.2.0

28
vendor/github.com/cyphar/filepath-securejoin/LICENSE generated vendored Normal file
View File

@ -0,0 +1,28 @@
Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

169
vendor/github.com/cyphar/filepath-securejoin/README.md generated vendored Normal file
View File

@ -0,0 +1,169 @@
## `filepath-securejoin` ##
[![Go Documentation](https://pkg.go.dev/badge/github.com/cyphar/filepath-securejoin.svg)](https://pkg.go.dev/github.com/cyphar/filepath-securejoin)
[![Build Status](https://github.com/cyphar/filepath-securejoin/actions/workflows/ci.yml/badge.svg)](https://github.com/cyphar/filepath-securejoin/actions/workflows/ci.yml)
### Old API ###
This library was originally just an implementation of `SecureJoin` which was
[intended to be included in the Go standard library][go#20126] as a safer
`filepath.Join` that would restrict the path lookup to be inside a root
directory.
The implementation was based on code that existed in several container
runtimes. Unfortunately, this API is **fundamentally unsafe** against attackers
that can modify path components after `SecureJoin` returns and before the
caller uses the path, allowing for some fairly trivial TOCTOU attacks.
`SecureJoin` (and `SecureJoinVFS`) are still provided by this library to
support legacy users, but new users are strongly suggested to avoid using
`SecureJoin` and instead use the [new api](#new-api) or switch to
[libpathrs][libpathrs].
With the above limitations in mind, this library guarantees the following:
* If no error is set, the resulting string **must** be a child path of
`root` and will not contain any symlink path components (they will all be
expanded).
* When expanding symlinks, all symlink path components **must** be resolved
relative to the provided root. In particular, this can be considered a
userspace implementation of how `chroot(2)` operates on file paths. Note that
these symlinks will **not** be expanded lexically (`filepath.Clean` is not
called on the input before processing).
* Non-existent path components are unaffected by `SecureJoin` (similar to
`filepath.EvalSymlinks`'s semantics).
* The returned path will always be `filepath.Clean`ed and thus not contain any
`..` components.
A (trivial) implementation of this function on GNU/Linux systems could be done
with the following (note that this requires root privileges and is far more
opaque than the implementation in this library, and also requires that
`readlink` is inside the `root` path and is trustworthy):
```go
package securejoin
import (
"os/exec"
"path/filepath"
)
func SecureJoin(root, unsafePath string) (string, error) {
unsafePath = string(filepath.Separator) + unsafePath
cmd := exec.Command("chroot", root,
"readlink", "--canonicalize-missing", "--no-newline", unsafePath)
output, err := cmd.CombinedOutput()
if err != nil {
return "", err
}
expanded := string(output)
return filepath.Join(root, expanded), nil
}
```
[libpathrs]: https://github.com/openSUSE/libpathrs
[go#20126]: https://github.com/golang/go/issues/20126
### New API ###
While we recommend users switch to [libpathrs][libpathrs] as soon as it has a
stable release, some methods implemented by libpathrs have been ported to this
library to ease the transition. These APIs are only supported on Linux.
These APIs are implemented such that `filepath-securejoin` will
opportunistically use certain newer kernel APIs that make these operations far
more secure. In particular:
* All of the lookup operations will use [`openat2`][openat2.2] on new enough
kernels (Linux 5.6 or later) to restrict lookups through magic-links and
bind-mounts (for certain operations) and to make use of `RESOLVE_IN_ROOT` to
efficiently resolve symlinks within a rootfs.
* The APIs provide hardening against a malicious `/proc` mount to either detect
or avoid being tricked by a `/proc` that is not legitimate. This is done
using [`openat2`][openat2.2] for all users, and privileged users will also be
further protected by using [`fsopen`][fsopen.2] and [`open_tree`][open_tree.2]
(Linux 5.2 or later).
[openat2.2]: https://www.man7.org/linux/man-pages/man2/openat2.2.html
[fsopen.2]: https://github.com/brauner/man-pages-md/blob/main/fsopen.md
[open_tree.2]: https://github.com/brauner/man-pages-md/blob/main/open_tree.md
#### `OpenInRoot` ####
```go
func OpenInRoot(root, unsafePath string) (*os.File, error)
func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error)
func Reopen(handle *os.File, flags int) (*os.File, error)
```
`OpenInRoot` is a much safer version of
```go
path, err := securejoin.SecureJoin(root, unsafePath)
file, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
```
that protects against various race attacks that could lead to serious security
issues, depending on the application. Note that the returned `*os.File` is an
`O_PATH` file descriptor, which is quite restricted. Callers will probably need
to use `Reopen` to get a more usable handle (this split is done to provide
useful features like PTY spawning and to avoid users accidentally opening bad
inodes that could cause a DoS).
Callers need to be careful in how they use the returned `*os.File`. Usually it
is only safe to operate on the handle directly, and it is very easy to create a
security issue. [libpathrs][libpathrs] provides far more helpers to make using
these handles safer -- there is currently no plan to port them to
`filepath-securejoin`.
`OpenatInRoot` is like `OpenInRoot` except that the root is provided using an
`*os.File`. This allows you to ensure that multiple `OpenatInRoot` (or
`MkdirAllHandle`) calls are operating on the same rootfs.
> **NOTE**: Unlike `SecureJoin`, `OpenInRoot` will error out as soon as it hits
> a dangling symlink or non-existent path. This is in contrast to `SecureJoin`
> which treated non-existent components as though they were real directories,
> and would allow for partial resolution of dangling symlinks. These behaviours
> are at odds with how Linux treats non-existent paths and dangling symlinks,
> and so these are no longer allowed.
#### `MkdirAll` ####
```go
func MkdirAll(root, unsafePath string, mode int) error
func MkdirAllHandle(root *os.File, unsafePath string, mode int) (*os.File, error)
```
`MkdirAll` is a much safer version of
```go
path, err := securejoin.SecureJoin(root, unsafePath)
err = os.MkdirAll(path, mode)
```
that protects against the same kinds of races that `OpenInRoot` protects
against.
`MkdirAllHandle` is like `MkdirAll` except that the root is provided using an
`*os.File` (the reason for this is the same as with `OpenatInRoot`) and an
`*os.File` of the final created directory is returned (this directory is
guaranteed to be effectively identical to the directory created by
`MkdirAllHandle`, which is not possible to ensure by just using `OpenatInRoot`
after `MkdirAll`).
> **NOTE**: Unlike `SecureJoin`, `MkdirAll` will error out as soon as it hits
> a dangling symlink or non-existent path. This is in contrast to `SecureJoin`
> which treated non-existent components as though they were real directories,
> and would allow for partial resolution of dangling symlinks. These behaviours
> are at odds with how Linux treats non-existent paths and dangling symlinks,
> and so these are no longer allowed. This means that `MkdirAll` will not
> create non-existent directories referenced by a dangling symlink.
### License ###
The license of this project is the same as Go, which is a BSD 3-clause license
available in the `LICENSE` file.

1
vendor/github.com/cyphar/filepath-securejoin/VERSION generated vendored Normal file
View File

@ -0,0 +1 @@
0.4.1

39
vendor/github.com/cyphar/filepath-securejoin/doc.go generated vendored Normal file
View File

@ -0,0 +1,39 @@
// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
// Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package securejoin implements a set of helpers to make it easier to write Go
// code that is safe against symlink-related escape attacks. The primary idea
// is to let you resolve a path within a rootfs directory as if the rootfs was
// a chroot.
//
// securejoin has two APIs, a "legacy" API and a "modern" API.
//
// The legacy API is [SecureJoin] and [SecureJoinVFS]. These methods are
// **not** safe against race conditions where an attacker changes the
// filesystem after (or during) the [SecureJoin] operation.
//
// The new API is made up of [OpenInRoot] and [MkdirAll] (and derived
// functions). These are safe against racing attackers and have several other
// protections that are not provided by the legacy API. There are many more
// operations that most programs expect to be able to do safely, but we do not
// provide explicit support for them because we want to encourage users to
// switch to [libpathrs](https://github.com/openSUSE/libpathrs) which is a
// cross-language next-generation library that is entirely designed around
// operating on paths safely.
//
// securejoin has been used by several container runtimes (Docker, runc,
// Kubernetes, etc) for quite a few years as a de-facto standard for operating
// on container filesystem paths "safely". However, most users still use the
// legacy API which is unsafe against various attacks (there is a fairly long
// history of CVEs in dependent as a result). Users should switch to the modern
// API as soon as possible (or even better, switch to libpathrs).
//
// This project was initially intended to be included in the Go standard
// library, but [it was rejected](https://go.dev/issue/20126). There is now a
// [new Go proposal](https://go.dev/issue/67002) for a safe path resolution API
// that shares some of the goals of filepath-securejoin. However, that design
// is intended to work like `openat2(RESOLVE_BENEATH)` which does not fit the
// usecase of container runtimes and most system tools.
package securejoin

View File

@ -0,0 +1,18 @@
//go:build linux && go1.20
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"fmt"
)
// wrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except
// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap)
// is only guaranteed to give you baseErr.
func wrapBaseError(baseErr, extraErr error) error {
return fmt.Errorf("%w: %w", extraErr, baseErr)
}

View File

@ -0,0 +1,38 @@
//go:build linux && !go1.20
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"fmt"
)
type wrappedError struct {
inner error
isError error
}
func (err wrappedError) Is(target error) bool {
return err.isError == target
}
func (err wrappedError) Unwrap() error {
return err.inner
}
func (err wrappedError) Error() string {
return fmt.Sprintf("%v: %v", err.isError, err.inner)
}
// wrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except
// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap)
// is only guaranteed to give you baseErr.
func wrapBaseError(baseErr, extraErr error) error {
return wrappedError{
inner: baseErr,
isError: extraErr,
}
}

View File

@ -0,0 +1,32 @@
//go:build linux && go1.21
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"slices"
"sync"
)
func slices_DeleteFunc[S ~[]E, E any](slice S, delFn func(E) bool) S {
return slices.DeleteFunc(slice, delFn)
}
func slices_Contains[S ~[]E, E comparable](slice S, val E) bool {
return slices.Contains(slice, val)
}
func slices_Clone[S ~[]E, E any](slice S) S {
return slices.Clone(slice)
}
func sync_OnceValue[T any](f func() T) func() T {
return sync.OnceValue(f)
}
func sync_OnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) {
return sync.OnceValues(f)
}

View File

@ -0,0 +1,124 @@
//go:build linux && !go1.21
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"sync"
)
// These are very minimal implementations of functions that appear in Go 1.21's
// stdlib, included so that we can build on older Go versions. Most are
// borrowed directly from the stdlib, and a few are modified to be "obviously
// correct" without needing to copy too many other helpers.
// clearSlice is equivalent to the builtin clear from Go 1.21.
// Copied from the Go 1.24 stdlib implementation.
func clearSlice[S ~[]E, E any](slice S) {
var zero E
for i := range slice {
slice[i] = zero
}
}
// Copied from the Go 1.24 stdlib implementation.
func slices_IndexFunc[S ~[]E, E any](s S, f func(E) bool) int {
for i := range s {
if f(s[i]) {
return i
}
}
return -1
}
// Copied from the Go 1.24 stdlib implementation.
func slices_DeleteFunc[S ~[]E, E any](s S, del func(E) bool) S {
i := slices_IndexFunc(s, del)
if i == -1 {
return s
}
// Don't start copying elements until we find one to delete.
for j := i + 1; j < len(s); j++ {
if v := s[j]; !del(v) {
s[i] = v
i++
}
}
clearSlice(s[i:]) // zero/nil out the obsolete elements, for GC
return s[:i]
}
// Similar to the stdlib slices.Contains, except that we don't have
// slices.Index so we need to use slices.IndexFunc for this non-Func helper.
func slices_Contains[S ~[]E, E comparable](s S, v E) bool {
return slices_IndexFunc(s, func(e E) bool { return e == v }) >= 0
}
// Copied from the Go 1.24 stdlib implementation.
func slices_Clone[S ~[]E, E any](s S) S {
// Preserve nil in case it matters.
if s == nil {
return nil
}
return append(S([]E{}), s...)
}
// Copied from the Go 1.24 stdlib implementation.
func sync_OnceValue[T any](f func() T) func() T {
var (
once sync.Once
valid bool
p any
result T
)
g := func() {
defer func() {
p = recover()
if !valid {
panic(p)
}
}()
result = f()
f = nil
valid = true
}
return func() T {
once.Do(g)
if !valid {
panic(p)
}
return result
}
}
// Copied from the Go 1.24 stdlib implementation.
func sync_OnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) {
var (
once sync.Once
valid bool
p any
r1 T1
r2 T2
)
g := func() {
defer func() {
p = recover()
if !valid {
panic(p)
}
}()
r1, r2 = f()
f = nil
valid = true
}
return func() (T1, T2) {
once.Do(g)
if !valid {
panic(p)
}
return r1, r2
}
}

166
vendor/github.com/cyphar/filepath-securejoin/join.go generated vendored Normal file
View File

@ -0,0 +1,166 @@
// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
// Copyright (C) 2017-2025 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"errors"
"os"
"path/filepath"
"strings"
"syscall"
)
const maxSymlinkLimit = 255
// IsNotExist tells you if err is an error that implies that either the path
// accessed does not exist (or path components don't exist). This is
// effectively a more broad version of [os.IsNotExist].
func IsNotExist(err error) bool {
// Check that it's not actually an ENOTDIR, which in some cases is a more
// convoluted case of ENOENT (usually involving weird paths).
return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT)
}
// errUnsafeRoot is returned if the user provides SecureJoinVFS with a path
// that contains ".." components.
var errUnsafeRoot = errors.New("root path provided to SecureJoin contains '..' components")
// stripVolume just gets rid of the Windows volume included in a path. Based on
// some godbolt tests, the Go compiler is smart enough to make this a no-op on
// Linux.
func stripVolume(path string) string {
return path[len(filepath.VolumeName(path)):]
}
// hasDotDot checks if the path contains ".." components in a platform-agnostic
// way.
func hasDotDot(path string) bool {
// If we are on Windows, strip any volume letters. It turns out that
// C:..\foo may (or may not) be a valid pathname and we need to handle that
// leading "..".
path = stripVolume(path)
// Look for "/../" in the path, but we need to handle leading and trailing
// ".."s by adding separators. Doing this with filepath.Separator is ugly
// so just convert to Unix-style "/" first.
path = filepath.ToSlash(path)
return strings.Contains("/"+path+"/", "/../")
}
// SecureJoinVFS joins the two given path components (similar to [filepath.Join]) except
// that the returned path is guaranteed to be scoped inside the provided root
// path (when evaluated). Any symbolic links in the path are evaluated with the
// given root treated as the root of the filesystem, similar to a chroot. The
// filesystem state is evaluated through the given [VFS] interface (if nil, the
// standard [os].* family of functions are used).
//
// Note that the guarantees provided by this function only apply if the path
// components in the returned string are not modified (in other words are not
// replaced with symlinks on the filesystem) after this function has returned.
// Such a symlink race is necessarily out-of-scope of SecureJoinVFS.
//
// NOTE: Due to the above limitation, Linux users are strongly encouraged to
// use [OpenInRoot] instead, which does safely protect against these kinds of
// attacks. There is no way to solve this problem with SecureJoinVFS because
// the API is fundamentally wrong (you cannot return a "safe" path string and
// guarantee it won't be modified afterwards).
//
// Volume names in unsafePath are always discarded, regardless if they are
// provided via direct input or when evaluating symlinks. Therefore:
//
// "C:\Temp" + "D:\path\to\file.txt" results in "C:\Temp\path\to\file.txt"
//
// If the provided root is not [filepath.Clean] then an error will be returned,
// as such root paths are bordering on somewhat unsafe and using such paths is
// not best practice. We also strongly suggest that any root path is first
// fully resolved using [filepath.EvalSymlinks] or otherwise constructed to
// avoid containing symlink components. Of course, the root also *must not* be
// attacker-controlled.
func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) {
// The root path must not contain ".." components, otherwise when we join
// the subpath we will end up with a weird path. We could work around this
// in other ways but users shouldn't be giving us non-lexical root paths in
// the first place.
if hasDotDot(root) {
return "", errUnsafeRoot
}
// Use the os.* VFS implementation if none was specified.
if vfs == nil {
vfs = osVFS{}
}
unsafePath = filepath.FromSlash(unsafePath)
var (
currentPath string
remainingPath = unsafePath
linksWalked int
)
for remainingPath != "" {
// On Windows, if we managed to end up at a path referencing a volume,
// drop the volume to make sure we don't end up with broken paths or
// escaping the root volume.
remainingPath = stripVolume(remainingPath)
// Get the next path component.
var part string
if i := strings.IndexRune(remainingPath, filepath.Separator); i == -1 {
part, remainingPath = remainingPath, ""
} else {
part, remainingPath = remainingPath[:i], remainingPath[i+1:]
}
// Apply the component lexically to the path we are building.
// currentPath does not contain any symlinks, and we are lexically
// dealing with a single component, so it's okay to do a filepath.Clean
// here.
nextPath := filepath.Join(string(filepath.Separator), currentPath, part)
if nextPath == string(filepath.Separator) {
currentPath = ""
continue
}
fullPath := root + string(filepath.Separator) + nextPath
// Figure out whether the path is a symlink.
fi, err := vfs.Lstat(fullPath)
if err != nil && !IsNotExist(err) {
return "", err
}
// Treat non-existent path components the same as non-symlinks (we
// can't do any better here).
if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 {
currentPath = nextPath
continue
}
// It's a symlink, so get its contents and expand it by prepending it
// to the yet-unparsed path.
linksWalked++
if linksWalked > maxSymlinkLimit {
return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP}
}
dest, err := vfs.Readlink(fullPath)
if err != nil {
return "", err
}
remainingPath = dest + string(filepath.Separator) + remainingPath
// Absolute symlinks reset any work we've already done.
if filepath.IsAbs(dest) {
currentPath = ""
}
}
// There should be no lexical components like ".." left in the path here,
// but for safety clean up the path before joining it to the root.
finalPath := filepath.Join(string(filepath.Separator), currentPath)
return filepath.Join(root, finalPath), nil
}
// SecureJoin is a wrapper around [SecureJoinVFS] that just uses the [os].* library
// of functions as the [VFS]. If in doubt, use this function over [SecureJoinVFS].
func SecureJoin(root, unsafePath string) (string, error) {
return SecureJoinVFS(root, unsafePath, nil)
}

View File

@ -0,0 +1,388 @@
//go:build linux
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"errors"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"golang.org/x/sys/unix"
)
type symlinkStackEntry struct {
// (dir, remainingPath) is what we would've returned if the link didn't
// exist. This matches what openat2(RESOLVE_IN_ROOT) would return in
// this case.
dir *os.File
remainingPath string
// linkUnwalked is the remaining path components from the original
// Readlink which we have yet to walk. When this slice is empty, we
// drop the link from the stack.
linkUnwalked []string
}
func (se symlinkStackEntry) String() string {
return fmt.Sprintf("<%s>/%s [->%s]", se.dir.Name(), se.remainingPath, strings.Join(se.linkUnwalked, "/"))
}
func (se symlinkStackEntry) Close() {
_ = se.dir.Close()
}
type symlinkStack []*symlinkStackEntry
func (s *symlinkStack) IsEmpty() bool {
return s == nil || len(*s) == 0
}
func (s *symlinkStack) Close() {
if s != nil {
for _, link := range *s {
link.Close()
}
// TODO: Switch to clear once we switch to Go 1.21.
*s = nil
}
}
var (
errEmptyStack = errors.New("[internal] stack is empty")
errBrokenSymlinkStack = errors.New("[internal error] broken symlink stack")
)
func (s *symlinkStack) popPart(part string) error {
if s == nil || s.IsEmpty() {
// If there is nothing in the symlink stack, then the part was from the
// real path provided by the user, and this is a no-op.
return errEmptyStack
}
if part == "." {
// "." components are no-ops -- we drop them when doing SwapLink.
return nil
}
tailEntry := (*s)[len(*s)-1]
// Double-check that we are popping the component we expect.
if len(tailEntry.linkUnwalked) == 0 {
return fmt.Errorf("%w: trying to pop component %q of empty stack entry %s", errBrokenSymlinkStack, part, tailEntry)
}
headPart := tailEntry.linkUnwalked[0]
if headPart != part {
return fmt.Errorf("%w: trying to pop component %q but the last stack entry is %s (%q)", errBrokenSymlinkStack, part, tailEntry, headPart)
}
// Drop the component, but keep the entry around in case we are dealing
// with a "tail-chained" symlink.
tailEntry.linkUnwalked = tailEntry.linkUnwalked[1:]
return nil
}
func (s *symlinkStack) PopPart(part string) error {
if err := s.popPart(part); err != nil {
if errors.Is(err, errEmptyStack) {
// Skip empty stacks.
err = nil
}
return err
}
// Clean up any of the trailing stack entries that are empty.
for lastGood := len(*s) - 1; lastGood >= 0; lastGood-- {
entry := (*s)[lastGood]
if len(entry.linkUnwalked) > 0 {
break
}
entry.Close()
(*s) = (*s)[:lastGood]
}
return nil
}
func (s *symlinkStack) push(dir *os.File, remainingPath, linkTarget string) error {
if s == nil {
return nil
}
// Split the link target and clean up any "" parts.
linkTargetParts := slices_DeleteFunc(
strings.Split(linkTarget, "/"),
func(part string) bool { return part == "" || part == "." })
// Copy the directory so the caller doesn't close our copy.
dirCopy, err := dupFile(dir)
if err != nil {
return err
}
// Add to the stack.
*s = append(*s, &symlinkStackEntry{
dir: dirCopy,
remainingPath: remainingPath,
linkUnwalked: linkTargetParts,
})
return nil
}
func (s *symlinkStack) SwapLink(linkPart string, dir *os.File, remainingPath, linkTarget string) error {
// If we are currently inside a symlink resolution, remove the symlink
// component from the last symlink entry, but don't remove the entry even
// if it's empty. If we are a "tail-chained" symlink (a trailing symlink we
// hit during a symlink resolution) we need to keep the old symlink until
// we finish the resolution.
if err := s.popPart(linkPart); err != nil {
if !errors.Is(err, errEmptyStack) {
return err
}
// Push the component regardless of whether the stack was empty.
}
return s.push(dir, remainingPath, linkTarget)
}
func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) {
if s == nil || s.IsEmpty() {
return nil, "", false
}
tailEntry := (*s)[0]
*s = (*s)[1:]
return tailEntry.dir, tailEntry.remainingPath, true
}
// partialLookupInRoot tries to lookup as much of the request path as possible
// within the provided root (a-la RESOLVE_IN_ROOT) and opens the final existing
// component of the requested path, returning a file handle to the final
// existing component and a string containing the remaining path components.
func partialLookupInRoot(root *os.File, unsafePath string) (*os.File, string, error) {
return lookupInRoot(root, unsafePath, true)
}
func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) {
handle, remainingPath, err := lookupInRoot(root, unsafePath, false)
if remainingPath != "" && err == nil {
// should never happen
err = fmt.Errorf("[bug] non-empty remaining path when doing a non-partial lookup: %q", remainingPath)
}
// lookupInRoot(partial=false) will always close the handle if an error is
// returned, so no need to double-check here.
return handle, err
}
func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) {
unsafePath = filepath.ToSlash(unsafePath) // noop
// This is very similar to SecureJoin, except that we operate on the
// components using file descriptors. We then return the last component we
// managed open, along with the remaining path components not opened.
// Try to use openat2 if possible.
if hasOpenat2() {
return lookupOpenat2(root, unsafePath, partial)
}
// Get the "actual" root path from /proc/self/fd. This is necessary if the
// root is some magic-link like /proc/$pid/root, in which case we want to
// make sure when we do checkProcSelfFdPath that we are using the correct
// root path.
logicalRootPath, err := procSelfFdReadlink(root)
if err != nil {
return nil, "", fmt.Errorf("get real root path: %w", err)
}
currentDir, err := dupFile(root)
if err != nil {
return nil, "", fmt.Errorf("clone root fd: %w", err)
}
defer func() {
// If a handle is not returned, close the internal handle.
if Handle == nil {
_ = currentDir.Close()
}
}()
// symlinkStack is used to emulate how openat2(RESOLVE_IN_ROOT) treats
// dangling symlinks. If we hit a non-existent path while resolving a
// symlink, we need to return the (dir, remainingPath) that we had when we
// hit the symlink (treating the symlink as though it were a regular file).
// The set of (dir, remainingPath) sets is stored within the symlinkStack
// and we add and remove parts when we hit symlink and non-symlink
// components respectively. We need a stack because of recursive symlinks
// (symlinks that contain symlink components in their target).
//
// Note that the stack is ONLY used for book-keeping. All of the actual
// path walking logic is still based on currentPath/remainingPath and
// currentDir (as in SecureJoin).
var symStack *symlinkStack
if partial {
symStack = new(symlinkStack)
defer symStack.Close()
}
var (
linksWalked int
currentPath string
remainingPath = unsafePath
)
for remainingPath != "" {
// Save the current remaining path so if the part is not real we can
// return the path including the component.
oldRemainingPath := remainingPath
// Get the next path component.
var part string
if i := strings.IndexByte(remainingPath, '/'); i == -1 {
part, remainingPath = remainingPath, ""
} else {
part, remainingPath = remainingPath[:i], remainingPath[i+1:]
}
// If we hit an empty component, we need to treat it as though it is
// "." so that trailing "/" and "//" components on a non-directory
// correctly return the right error code.
if part == "" {
part = "."
}
// Apply the component lexically to the path we are building.
// currentPath does not contain any symlinks, and we are lexically
// dealing with a single component, so it's okay to do a filepath.Clean
// here.
nextPath := path.Join("/", currentPath, part)
// If we logically hit the root, just clone the root rather than
// opening the part and doing all of the other checks.
if nextPath == "/" {
if err := symStack.PopPart(part); err != nil {
return nil, "", fmt.Errorf("walking into root with part %q failed: %w", part, err)
}
// Jump to root.
rootClone, err := dupFile(root)
if err != nil {
return nil, "", fmt.Errorf("clone root fd: %w", err)
}
_ = currentDir.Close()
currentDir = rootClone
currentPath = nextPath
continue
}
// Try to open the next component.
nextDir, err := openatFile(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
switch {
case err == nil:
st, err := nextDir.Stat()
if err != nil {
_ = nextDir.Close()
return nil, "", fmt.Errorf("stat component %q: %w", part, err)
}
switch st.Mode() & os.ModeType {
case os.ModeSymlink:
// readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See
// Linux commit 65cfc6722361 ("readlinkat(), fchownat() and
// fstatat() with empty relative pathnames").
linkDest, err := readlinkatFile(nextDir, "")
// We don't need the handle anymore.
_ = nextDir.Close()
if err != nil {
return nil, "", err
}
linksWalked++
if linksWalked > maxSymlinkLimit {
return nil, "", &os.PathError{Op: "securejoin.lookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP}
}
// Swap out the symlink's component for the link entry itself.
if err := symStack.SwapLink(part, currentDir, oldRemainingPath, linkDest); err != nil {
return nil, "", fmt.Errorf("walking into symlink %q failed: push symlink: %w", part, err)
}
// Update our logical remaining path.
remainingPath = linkDest + "/" + remainingPath
// Absolute symlinks reset any work we've already done.
if path.IsAbs(linkDest) {
// Jump to root.
rootClone, err := dupFile(root)
if err != nil {
return nil, "", fmt.Errorf("clone root fd: %w", err)
}
_ = currentDir.Close()
currentDir = rootClone
currentPath = "/"
}
default:
// If we are dealing with a directory, simply walk into it.
_ = currentDir.Close()
currentDir = nextDir
currentPath = nextPath
// The part was real, so drop it from the symlink stack.
if err := symStack.PopPart(part); err != nil {
return nil, "", fmt.Errorf("walking into directory %q failed: %w", part, err)
}
// If we are operating on a .., make sure we haven't escaped.
// We only have to check for ".." here because walking down
// into a regular component component cannot cause you to
// escape. This mirrors the logic in RESOLVE_IN_ROOT, except we
// have to check every ".." rather than only checking after a
// rename or mount on the system.
if part == ".." {
// Make sure the root hasn't moved.
if err := checkProcSelfFdPath(logicalRootPath, root); err != nil {
return nil, "", fmt.Errorf("root path moved during lookup: %w", err)
}
// Make sure the path is what we expect.
fullPath := logicalRootPath + nextPath
if err := checkProcSelfFdPath(fullPath, currentDir); err != nil {
return nil, "", fmt.Errorf("walking into %q had unexpected result: %w", part, err)
}
}
}
default:
if !partial {
return nil, "", err
}
// If there are any remaining components in the symlink stack, we
// are still within a symlink resolution and thus we hit a dangling
// symlink. So pretend that the first symlink in the stack we hit
// was an ENOENT (to match openat2).
if oldDir, remainingPath, ok := symStack.PopTopSymlink(); ok {
_ = currentDir.Close()
return oldDir, remainingPath, err
}
// We have hit a final component that doesn't exist, so we have our
// partial open result. Note that we have to use the OLD remaining
// path, since the lookup failed.
return currentDir, oldRemainingPath, err
}
}
// If the unsafePath had a trailing slash, we need to make sure we try to
// do a relative "." open so that we will correctly return an error when
// the final component is a non-directory (to match openat2). In the
// context of openat2, a trailing slash and a trailing "/." are completely
// equivalent.
if strings.HasSuffix(unsafePath, "/") {
nextDir, err := openatFile(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
if !partial {
_ = currentDir.Close()
currentDir = nil
}
return currentDir, "", err
}
_ = currentDir.Close()
currentDir = nextDir
}
// All of the components existed!
return currentDir, "", nil
}

View File

@ -0,0 +1,236 @@
//go:build linux
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"golang.org/x/sys/unix"
)
var (
errInvalidMode = errors.New("invalid permission mode")
errPossibleAttack = errors.New("possible attack detected")
)
// modePermExt is like os.ModePerm except that it also includes the set[ug]id
// and sticky bits.
const modePermExt = os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky
//nolint:cyclop // this function needs to handle a lot of cases
func toUnixMode(mode os.FileMode) (uint32, error) {
sysMode := uint32(mode.Perm())
if mode&os.ModeSetuid != 0 {
sysMode |= unix.S_ISUID
}
if mode&os.ModeSetgid != 0 {
sysMode |= unix.S_ISGID
}
if mode&os.ModeSticky != 0 {
sysMode |= unix.S_ISVTX
}
// We don't allow file type bits.
if mode&os.ModeType != 0 {
return 0, fmt.Errorf("%w %+.3o (%s): type bits not permitted", errInvalidMode, mode, mode)
}
// We don't allow other unknown modes.
if mode&^modePermExt != 0 || sysMode&unix.S_IFMT != 0 {
return 0, fmt.Errorf("%w %+.3o (%s): unknown mode bits", errInvalidMode, mode, mode)
}
return sysMode, nil
}
// MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use
// in two respects:
//
// - The caller provides the root directory as an *[os.File] (preferably O_PATH)
// handle. This means that the caller can be sure which root directory is
// being used. Note that this can be emulated by using /proc/self/fd/... as
// the root path with [os.MkdirAll].
//
// - Once all of the directories have been created, an *[os.File] O_PATH handle
// to the directory at unsafePath is returned to the caller. This is done in
// an effectively-race-free way (an attacker would only be able to swap the
// final directory component), which is not possible to emulate with
// [MkdirAll].
//
// In addition, the returned handle is obtained far more efficiently than doing
// a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after
// doing [MkdirAll]. If you intend to open the directory after creating it, you
// should use MkdirAllHandle.
func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.File, Err error) {
unixMode, err := toUnixMode(mode)
if err != nil {
return nil, err
}
// On Linux, mkdirat(2) (and os.Mkdir) silently ignore the suid and sgid
// bits. We could also silently ignore them but since we have very few
// users it seems more prudent to return an error so users notice that
// these bits will not be set.
if unixMode&^0o1777 != 0 {
return nil, fmt.Errorf("%w for mkdir %+.3o: suid and sgid are ignored by mkdir", errInvalidMode, mode)
}
// Try to open as much of the path as possible.
currentDir, remainingPath, err := partialLookupInRoot(root, unsafePath)
defer func() {
if Err != nil {
_ = currentDir.Close()
}
}()
if err != nil && !errors.Is(err, unix.ENOENT) {
return nil, fmt.Errorf("find existing subpath of %q: %w", unsafePath, err)
}
// If there is an attacker deleting directories as we walk into them,
// detect this proactively. Note this is guaranteed to detect if the
// attacker deleted any part of the tree up to currentDir.
//
// Once we walk into a dead directory, partialLookupInRoot would not be
// able to walk further down the tree (directories must be empty before
// they are deleted), and if the attacker has removed the entire tree we
// can be sure that anything that was originally inside a dead directory
// must also be deleted and thus is a dead directory in its own right.
//
// This is mostly a quality-of-life check, because mkdir will simply fail
// later if the attacker deletes the tree after this check.
if err := isDeadInode(currentDir); err != nil {
return nil, fmt.Errorf("finding existing subpath of %q: %w", unsafePath, err)
}
// Re-open the path to match the O_DIRECTORY reopen loop later (so that we
// always return a non-O_PATH handle). We also check that we actually got a
// directory.
if reopenDir, err := Reopen(currentDir, unix.O_DIRECTORY|unix.O_CLOEXEC); errors.Is(err, unix.ENOTDIR) {
return nil, fmt.Errorf("cannot create subdirectories in %q: %w", currentDir.Name(), unix.ENOTDIR)
} else if err != nil {
return nil, fmt.Errorf("re-opening handle to %q: %w", currentDir.Name(), err)
} else {
_ = currentDir.Close()
currentDir = reopenDir
}
remainingParts := strings.Split(remainingPath, string(filepath.Separator))
if slices_Contains(remainingParts, "..") {
// The path contained ".." components after the end of the "real"
// components. We could try to safely resolve ".." here but that would
// add a bunch of extra logic for something that it's not clear even
// needs to be supported. So just return an error.
//
// If we do filepath.Clean(remainingPath) then we end up with the
// problem that ".." can erase a trailing dangling symlink and produce
// a path that doesn't quite match what the user asked for.
return nil, fmt.Errorf("%w: yet-to-be-created path %q contains '..' components", unix.ENOENT, remainingPath)
}
// Create the remaining components.
for _, part := range remainingParts {
switch part {
case "", ".":
// Skip over no-op paths.
continue
}
// NOTE: mkdir(2) will not follow trailing symlinks, so we can safely
// create the final component without worrying about symlink-exchange
// attacks.
//
// If we get -EEXIST, it's possible that another program created the
// directory at the same time as us. In that case, just continue on as
// if we created it (if the created inode is not a directory, the
// following open call will fail).
if err := unix.Mkdirat(int(currentDir.Fd()), part, unixMode); err != nil && !errors.Is(err, unix.EEXIST) {
err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err}
// Make the error a bit nicer if the directory is dead.
if deadErr := isDeadInode(currentDir); deadErr != nil {
// TODO: Once we bump the minimum Go version to 1.20, we can use
// multiple %w verbs for this wrapping. For now we need to use a
// compatibility shim for older Go versions.
//err = fmt.Errorf("%w (%w)", err, deadErr)
err = wrapBaseError(err, deadErr)
}
return nil, err
}
// Get a handle to the next component. O_DIRECTORY means we don't need
// to use O_PATH.
var nextDir *os.File
if hasOpenat2() {
nextDir, err = openat2File(currentDir, part, &unix.OpenHow{
Flags: unix.O_NOFOLLOW | unix.O_DIRECTORY | unix.O_CLOEXEC,
Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_NO_XDEV,
})
} else {
nextDir, err = openatFile(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
}
if err != nil {
return nil, err
}
_ = currentDir.Close()
currentDir = nextDir
// It's possible that the directory we just opened was swapped by an
// attacker. Unfortunately there isn't much we can do to protect
// against this, and MkdirAll's behaviour is that we will reuse
// existing directories anyway so the need to protect against this is
// incredibly limited (and arguably doesn't even deserve mention here).
//
// Ideally we might want to check that the owner and mode match what we
// would've created -- unfortunately, it is non-trivial to verify that
// the owner and mode of the created directory match. While plain Unix
// DAC rules seem simple enough to emulate, there are a bunch of other
// factors that can change the mode or owner of created directories
// (default POSIX ACLs, mount options like uid=1,gid=2,umask=0 on
// filesystems like vfat, etc etc). We used to try to verify this but
// it just lead to a series of spurious errors.
//
// We could also check that the directory is non-empty, but
// unfortunately some pseduofilesystems (like cgroupfs) create
// non-empty directories, which would result in different spurious
// errors.
}
return currentDir, nil
}
// MkdirAll is a race-safe alternative to the [os.MkdirAll] function,
// where the new directory is guaranteed to be within the root directory (if an
// attacker can move directories from inside the root to outside the root, the
// created directory tree might be outside of the root but the key constraint
// is that at no point will we walk outside of the directory tree we are
// creating).
//
// Effectively, MkdirAll(root, unsafePath, mode) is equivalent to
//
// path, _ := securejoin.SecureJoin(root, unsafePath)
// err := os.MkdirAll(path, mode)
//
// But is much safer. The above implementation is unsafe because if an attacker
// can modify the filesystem tree between [SecureJoin] and [os.MkdirAll], it is
// possible for MkdirAll to resolve unsafe symlink components and create
// directories outside of the root.
//
// If you plan to open the directory after you have created it or want to use
// an open directory handle as the root, you should use [MkdirAllHandle] instead.
// This function is a wrapper around [MkdirAllHandle].
func MkdirAll(root, unsafePath string, mode os.FileMode) error {
rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return err
}
defer rootDir.Close()
f, err := MkdirAllHandle(rootDir, unsafePath, mode)
if err != nil {
return err
}
_ = f.Close()
return nil
}

View File

@ -0,0 +1,103 @@
//go:build linux
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"fmt"
"os"
"strconv"
"golang.org/x/sys/unix"
)
// OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided
// using an *[os.File] handle, to ensure that the correct root directory is used.
func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) {
handle, err := completeLookupInRoot(root, unsafePath)
if err != nil {
return nil, &os.PathError{Op: "securejoin.OpenInRoot", Path: unsafePath, Err: err}
}
return handle, nil
}
// OpenInRoot safely opens the provided unsafePath within the root.
// Effectively, OpenInRoot(root, unsafePath) is equivalent to
//
// path, _ := securejoin.SecureJoin(root, unsafePath)
// handle, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
//
// But is much safer. The above implementation is unsafe because if an attacker
// can modify the filesystem tree between [SecureJoin] and [os.OpenFile], it is
// possible for the returned file to be outside of the root.
//
// Note that the returned handle is an O_PATH handle, meaning that only a very
// limited set of operations will work on the handle. This is done to avoid
// accidentally opening an untrusted file that could cause issues (such as a
// disconnected TTY that could cause a DoS, or some other issue). In order to
// use the returned handle, you can "upgrade" it to a proper handle using
// [Reopen].
func OpenInRoot(root, unsafePath string) (*os.File, error) {
rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
defer rootDir.Close()
return OpenatInRoot(rootDir, unsafePath)
}
// Reopen takes an *[os.File] handle and re-opens it through /proc/self/fd.
// Reopen(file, flags) is effectively equivalent to
//
// fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd())
// os.OpenFile(fdPath, flags|unix.O_CLOEXEC)
//
// But with some extra hardenings to ensure that we are not tricked by a
// maliciously-configured /proc mount. While this attack scenario is not
// common, in container runtimes it is possible for higher-level runtimes to be
// tricked into configuring an unsafe /proc that can be used to attack file
// operations. See [CVE-2019-19921] for more details.
//
// [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw
func Reopen(handle *os.File, flags int) (*os.File, error) {
procRoot, err := getProcRoot()
if err != nil {
return nil, err
}
// We can't operate on /proc/thread-self/fd/$n directly when doing a
// re-open, so we need to open /proc/thread-self/fd and then open a single
// final component.
procFdDir, closer, err := procThreadSelf(procRoot, "fd/")
if err != nil {
return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err)
}
defer procFdDir.Close()
defer closer()
// Try to detect if there is a mount on top of the magic-link we are about
// to open. If we are using unsafeHostProcRoot(), this could change after
// we check it (and there's nothing we can do about that) but for
// privateProcRoot() this should be guaranteed to be safe (at least since
// Linux 5.12[1], when anonymous mount namespaces were completely isolated
// from external mounts including mount propagation events).
//
// [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
// onto targets that reside on shared mounts").
fdStr := strconv.Itoa(int(handle.Fd()))
if err := checkSymlinkOvermount(procRoot, procFdDir, fdStr); err != nil {
return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err)
}
flags |= unix.O_CLOEXEC
// Rather than just wrapping openatFile, open-code it so we can copy
// handle.Name().
reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0)
if err != nil {
return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err)
}
return os.NewFile(uintptr(reopenFd), handle.Name()), nil
}

View File

@ -0,0 +1,127 @@
//go:build linux
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"golang.org/x/sys/unix"
)
var hasOpenat2 = sync_OnceValue(func() bool {
fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{
Flags: unix.O_PATH | unix.O_CLOEXEC,
Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT,
})
if err != nil {
return false
}
_ = unix.Close(fd)
return true
})
func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool {
// RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve
// ".." while a mount or rename occurs anywhere on the system. This could
// happen spuriously, or as the result of an attacker trying to mess with
// us during lookup.
//
// In addition, scoped lookups have a "safety check" at the end of
// complete_walk which will return -EXDEV if the final path is not in the
// root.
return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 &&
(errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV))
}
const scopedLookupMaxRetries = 10
func openat2File(dir *os.File, path string, how *unix.OpenHow) (*os.File, error) {
fullPath := dir.Name() + "/" + path
// Make sure we always set O_CLOEXEC.
how.Flags |= unix.O_CLOEXEC
var tries int
for tries < scopedLookupMaxRetries {
fd, err := unix.Openat2(int(dir.Fd()), path, how)
if err != nil {
if scopedLookupShouldRetry(how, err) {
// We retry a couple of times to avoid the spurious errors, and
// if we are being attacked then returning -EAGAIN is the best
// we can do.
tries++
continue
}
return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err}
}
// If we are using RESOLVE_IN_ROOT, the name we generated may be wrong.
// NOTE: The procRoot code MUST NOT use RESOLVE_IN_ROOT, otherwise
// you'll get infinite recursion here.
if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT {
if actualPath, err := rawProcSelfFdReadlink(fd); err == nil {
fullPath = actualPath
}
}
return os.NewFile(uintptr(fd), fullPath), nil
}
return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: errPossibleAttack}
}
func lookupOpenat2(root *os.File, unsafePath string, partial bool) (*os.File, string, error) {
if !partial {
file, err := openat2File(root, unsafePath, &unix.OpenHow{
Flags: unix.O_PATH | unix.O_CLOEXEC,
Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
})
return file, "", err
}
return partialLookupOpenat2(root, unsafePath)
}
// partialLookupOpenat2 is an alternative implementation of
// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a
// handle to the deepest existing child of the requested path within the root.
func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, error) {
// TODO: Implement this as a git-bisect-like binary search.
unsafePath = filepath.ToSlash(unsafePath) // noop
endIdx := len(unsafePath)
var lastError error
for endIdx > 0 {
subpath := unsafePath[:endIdx]
handle, err := openat2File(root, subpath, &unix.OpenHow{
Flags: unix.O_PATH | unix.O_CLOEXEC,
Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
})
if err == nil {
// Jump over the slash if we have a non-"" remainingPath.
if endIdx < len(unsafePath) {
endIdx += 1
}
// We found a subpath!
return handle, unsafePath[endIdx:], lastError
}
if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) {
// That path doesn't exist, let's try the next directory up.
endIdx = strings.LastIndexByte(subpath, '/')
lastError = err
continue
}
return nil, "", fmt.Errorf("open subpath: %w", err)
}
// If we couldn't open anything, the whole subpath is missing. Return a
// copy of the root fd so that the caller doesn't close this one by
// accident.
rootClone, err := dupFile(root)
if err != nil {
return nil, "", err
}
return rootClone, unsafePath, lastError
}

View File

@ -0,0 +1,59 @@
//go:build linux
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"os"
"path/filepath"
"golang.org/x/sys/unix"
)
func dupFile(f *os.File) (*os.File, error) {
fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err)
}
return os.NewFile(uintptr(fd), f.Name()), nil
}
func openatFile(dir *os.File, path string, flags int, mode int) (*os.File, error) {
// Make sure we always set O_CLOEXEC.
flags |= unix.O_CLOEXEC
fd, err := unix.Openat(int(dir.Fd()), path, flags, uint32(mode))
if err != nil {
return nil, &os.PathError{Op: "openat", Path: dir.Name() + "/" + path, Err: err}
}
// All of the paths we use with openatFile(2) are guaranteed to be
// lexically safe, so we can use path.Join here.
fullPath := filepath.Join(dir.Name(), path)
return os.NewFile(uintptr(fd), fullPath), nil
}
func fstatatFile(dir *os.File, path string, flags int) (unix.Stat_t, error) {
var stat unix.Stat_t
if err := unix.Fstatat(int(dir.Fd()), path, &stat, flags); err != nil {
return stat, &os.PathError{Op: "fstatat", Path: dir.Name() + "/" + path, Err: err}
}
return stat, nil
}
func readlinkatFile(dir *os.File, path string) (string, error) {
size := 4096
for {
linkBuf := make([]byte, size)
n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf)
if err != nil {
return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err}
}
if n != size {
return string(linkBuf[:n]), nil
}
// Possible truncation, resize the buffer.
size *= 2
}
}

View File

@ -0,0 +1,452 @@
//go:build linux
// Copyright (C) 2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import (
"errors"
"fmt"
"os"
"runtime"
"strconv"
"golang.org/x/sys/unix"
)
func fstat(f *os.File) (unix.Stat_t, error) {
var stat unix.Stat_t
if err := unix.Fstat(int(f.Fd()), &stat); err != nil {
return stat, &os.PathError{Op: "fstat", Path: f.Name(), Err: err}
}
return stat, nil
}
func fstatfs(f *os.File) (unix.Statfs_t, error) {
var statfs unix.Statfs_t
if err := unix.Fstatfs(int(f.Fd()), &statfs); err != nil {
return statfs, &os.PathError{Op: "fstatfs", Path: f.Name(), Err: err}
}
return statfs, nil
}
// The kernel guarantees that the root inode of a procfs mount has an
// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO.
const (
procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC
procRootIno = 1 // PROC_ROOT_INO
)
func verifyProcRoot(procRoot *os.File) error {
if statfs, err := fstatfs(procRoot); err != nil {
return err
} else if statfs.Type != procSuperMagic {
return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type)
}
if stat, err := fstat(procRoot); err != nil {
return err
} else if stat.Ino != procRootIno {
return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino)
}
return nil
}
var hasNewMountApi = sync_OnceValue(func() bool {
// All of the pieces of the new mount API we use (fsopen, fsconfig,
// fsmount, open_tree) were added together in Linux 5.1[1,2], so we can
// just check for one of the syscalls and the others should also be
// available.
//
// Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE.
// This is equivalent to openat(2), but tells us if open_tree is
// available (and thus all of the other basic new mount API syscalls).
// open_tree(2) is most light-weight syscall to test here.
//
// [1]: merge commit 400913252d09
// [2]: <https://lore.kernel.org/lkml/153754740781.17872.7869536526927736855.stgit@warthog.procyon.org.uk/>
fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC)
if err != nil {
return false
}
_ = unix.Close(fd)
return true
})
func fsopen(fsName string, flags int) (*os.File, error) {
// Make sure we always set O_CLOEXEC.
flags |= unix.FSOPEN_CLOEXEC
fd, err := unix.Fsopen(fsName, flags)
if err != nil {
return nil, os.NewSyscallError("fsopen "+fsName, err)
}
return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil
}
func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) {
// Make sure we always set O_CLOEXEC.
flags |= unix.FSMOUNT_CLOEXEC
fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs)
if err != nil {
return nil, os.NewSyscallError("fsmount "+ctx.Name(), err)
}
return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil
}
func newPrivateProcMount() (*os.File, error) {
procfsCtx, err := fsopen("proc", unix.FSOPEN_CLOEXEC)
if err != nil {
return nil, err
}
defer procfsCtx.Close()
// Try to configure hidepid=ptraceable,subset=pid if possible, but ignore errors.
_ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable")
_ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid")
// Get an actual handle.
if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil {
return nil, os.NewSyscallError("fsconfig create procfs", err)
}
return fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID)
}
func openTree(dir *os.File, path string, flags uint) (*os.File, error) {
dirFd := -int(unix.EBADF)
dirName := "."
if dir != nil {
dirFd = int(dir.Fd())
dirName = dir.Name()
}
// Make sure we always set O_CLOEXEC.
flags |= unix.OPEN_TREE_CLOEXEC
fd, err := unix.OpenTree(dirFd, path, flags)
if err != nil {
return nil, &os.PathError{Op: "open_tree", Path: path, Err: err}
}
return os.NewFile(uintptr(fd), dirName+"/"+path), nil
}
func clonePrivateProcMount() (_ *os.File, Err error) {
// Try to make a clone without using AT_RECURSIVE if we can. If this works,
// we can be sure there are no over-mounts and so if the root is valid then
// we're golden. Otherwise, we have to deal with over-mounts.
procfsHandle, err := openTree(nil, "/proc", unix.OPEN_TREE_CLONE)
if err != nil || hookForcePrivateProcRootOpenTreeAtRecursive(procfsHandle) {
procfsHandle, err = openTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE)
}
if err != nil {
return nil, fmt.Errorf("creating a detached procfs clone: %w", err)
}
defer func() {
if Err != nil {
_ = procfsHandle.Close()
}
}()
if err := verifyProcRoot(procfsHandle); err != nil {
return nil, err
}
return procfsHandle, nil
}
func privateProcRoot() (*os.File, error) {
if !hasNewMountApi() || hookForceGetProcRootUnsafe() {
return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP)
}
// Try to create a new procfs mount from scratch if we can. This ensures we
// can get a procfs mount even if /proc is fake (for whatever reason).
procRoot, err := newPrivateProcMount()
if err != nil || hookForcePrivateProcRootOpenTree(procRoot) {
// Try to clone /proc then...
procRoot, err = clonePrivateProcMount()
}
return procRoot, err
}
func unsafeHostProcRoot() (_ *os.File, Err error) {
procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
defer func() {
if Err != nil {
_ = procRoot.Close()
}
}()
if err := verifyProcRoot(procRoot); err != nil {
return nil, err
}
return procRoot, nil
}
func doGetProcRoot() (*os.File, error) {
procRoot, err := privateProcRoot()
if err != nil {
// Fall back to using a /proc handle if making a private mount failed.
// If we have openat2, at least we can avoid some kinds of over-mount
// attacks, but without openat2 there's not much we can do.
procRoot, err = unsafeHostProcRoot()
}
return procRoot, err
}
var getProcRoot = sync_OnceValues(func() (*os.File, error) {
return doGetProcRoot()
})
var hasProcThreadSelf = sync_OnceValue(func() bool {
return unix.Access("/proc/thread-self/", unix.F_OK) == nil
})
var errUnsafeProcfs = errors.New("unsafe procfs detected")
type procThreadSelfCloser func()
// procThreadSelf returns a handle to /proc/thread-self/<subpath> (or an
// equivalent handle on older kernels where /proc/thread-self doesn't exist).
// Once finished with the handle, you must call the returned closer function
// (runtime.UnlockOSThread). You must not pass the returned *os.File to other
// Go threads or use the handle after calling the closer.
//
// This is similar to ProcThreadSelf from runc, but with extra hardening
// applied and using *os.File.
func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThreadSelfCloser, Err error) {
// We need to lock our thread until the caller is done with the handle
// because between getting the handle and using it we could get interrupted
// by the Go runtime and hit the case where the underlying thread is
// swapped out and the original thread is killed, resulting in
// pull-your-hair-out-hard-to-debug issues in the caller.
runtime.LockOSThread()
defer func() {
if Err != nil {
runtime.UnlockOSThread()
}
}()
// Figure out what prefix we want to use.
threadSelf := "thread-self/"
if !hasProcThreadSelf() || hookForceProcSelfTask() {
/// Pre-3.17 kernels don't have /proc/thread-self, so do it manually.
threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + "/"
if _, err := fstatatFile(procRoot, threadSelf, unix.AT_SYMLINK_NOFOLLOW); err != nil || hookForceProcSelf() {
// In this case, we running in a pid namespace that doesn't match
// the /proc mount we have. This can happen inside runc.
//
// Unfortunately, there is no nice way to get the correct TID to
// use here because of the age of the kernel, so we have to just
// use /proc/self and hope that it works.
threadSelf = "self/"
}
}
// Grab the handle.
var (
handle *os.File
err error
)
if hasOpenat2() {
// We prefer being able to use RESOLVE_NO_XDEV if we can, to be
// absolutely sure we are operating on a clean /proc handle that
// doesn't have any cheeky overmounts that could trick us (including
// symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't
// strictly needed, but just use it since we have it.
//
// NOTE: /proc/self is technically a magic-link (the contents of the
// symlink are generated dynamically), but it doesn't use
// nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it.
//
// NOTE: We MUST NOT use RESOLVE_IN_ROOT here, as openat2File uses
// procSelfFdReadlink to clean up the returned f.Name() if we use
// RESOLVE_IN_ROOT (which would lead to an infinite recursion).
handle, err = openat2File(procRoot, threadSelf+subpath, &unix.OpenHow{
Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC,
Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS,
})
if err != nil {
// TODO: Once we bump the minimum Go version to 1.20, we can use
// multiple %w verbs for this wrapping. For now we need to use a
// compatibility shim for older Go versions.
//err = fmt.Errorf("%w: %w", errUnsafeProcfs, err)
return nil, nil, wrapBaseError(err, errUnsafeProcfs)
}
} else {
handle, err = openatFile(procRoot, threadSelf+subpath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
// TODO: Once we bump the minimum Go version to 1.20, we can use
// multiple %w verbs for this wrapping. For now we need to use a
// compatibility shim for older Go versions.
//err = fmt.Errorf("%w: %w", errUnsafeProcfs, err)
return nil, nil, wrapBaseError(err, errUnsafeProcfs)
}
defer func() {
if Err != nil {
_ = handle.Close()
}
}()
// We can't detect bind-mounts of different parts of procfs on top of
// /proc (a-la RESOLVE_NO_XDEV), but we can at least be sure that we
// aren't on the wrong filesystem here.
if statfs, err := fstatfs(handle); err != nil {
return nil, nil, err
} else if statfs.Type != procSuperMagic {
return nil, nil, fmt.Errorf("%w: incorrect /proc/self/fd filesystem type 0x%x", errUnsafeProcfs, statfs.Type)
}
}
return handle, runtime.UnlockOSThread, nil
}
// STATX_MNT_ID_UNIQUE is provided in golang.org/x/sys@v0.20.0, but in order to
// avoid bumping the requirement for a single constant we can just define it
// ourselves.
const STATX_MNT_ID_UNIQUE = 0x4000
var hasStatxMountId = sync_OnceValue(func() bool {
var (
stx unix.Statx_t
// We don't care which mount ID we get. The kernel will give us the
// unique one if it is supported.
wantStxMask uint32 = STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
)
err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx)
return err == nil && stx.Mask&wantStxMask != 0
})
func getMountId(dir *os.File, path string) (uint64, error) {
// If we don't have statx(STATX_MNT_ID*) support, we can't do anything.
if !hasStatxMountId() {
return 0, nil
}
var (
stx unix.Statx_t
// We don't care which mount ID we get. The kernel will give us the
// unique one if it is supported.
wantStxMask uint32 = STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
)
err := unix.Statx(int(dir.Fd()), path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, int(wantStxMask), &stx)
if stx.Mask&wantStxMask == 0 {
// It's not a kernel limitation, for some reason we couldn't get a
// mount ID. Assume it's some kind of attack.
err = fmt.Errorf("%w: could not get mount id", errUnsafeProcfs)
}
if err != nil {
return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: dir.Name() + "/" + path, Err: err}
}
return stx.Mnt_id, nil
}
func checkSymlinkOvermount(procRoot *os.File, dir *os.File, path string) error {
// Get the mntId of our procfs handle.
expectedMountId, err := getMountId(procRoot, "")
if err != nil {
return err
}
// Get the mntId of the target magic-link.
gotMountId, err := getMountId(dir, path)
if err != nil {
return err
}
// As long as the directory mount is alive, even with wrapping mount IDs,
// we would expect to see a different mount ID here. (Of course, if we're
// using unsafeHostProcRoot() then an attaker could change this after we
// did this check.)
if expectedMountId != gotMountId {
return fmt.Errorf("%w: symlink %s/%s has an overmount obscuring the real link (mount ids do not match %d != %d)", errUnsafeProcfs, dir.Name(), path, expectedMountId, gotMountId)
}
return nil
}
func doRawProcSelfFdReadlink(procRoot *os.File, fd int) (string, error) {
fdPath := fmt.Sprintf("fd/%d", fd)
procFdLink, closer, err := procThreadSelf(procRoot, fdPath)
if err != nil {
return "", fmt.Errorf("get safe /proc/thread-self/%s handle: %w", fdPath, err)
}
defer procFdLink.Close()
defer closer()
// Try to detect if there is a mount on top of the magic-link. Since we use the handle directly
// provide to the closure. If the closure uses the handle directly, this
// should be safe in general (a mount on top of the path afterwards would
// not affect the handle itself) and will definitely be safe if we are
// using privateProcRoot() (at least since Linux 5.12[1], when anonymous
// mount namespaces were completely isolated from external mounts including
// mount propagation events).
//
// [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
// onto targets that reside on shared mounts").
if err := checkSymlinkOvermount(procRoot, procFdLink, ""); err != nil {
return "", fmt.Errorf("check safety of /proc/thread-self/fd/%d magiclink: %w", fd, err)
}
// readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit
// 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty
// relative pathnames").
return readlinkatFile(procFdLink, "")
}
func rawProcSelfFdReadlink(fd int) (string, error) {
procRoot, err := getProcRoot()
if err != nil {
return "", err
}
return doRawProcSelfFdReadlink(procRoot, fd)
}
func procSelfFdReadlink(f *os.File) (string, error) {
return rawProcSelfFdReadlink(int(f.Fd()))
}
var (
errPossibleBreakout = errors.New("possible breakout detected")
errInvalidDirectory = errors.New("wandered into deleted directory")
errDeletedInode = errors.New("cannot verify path of deleted inode")
)
func isDeadInode(file *os.File) error {
// If the nlink of a file drops to 0, there is an attacker deleting
// directories during our walk, which could result in weird /proc values.
// It's better to error out in this case.
stat, err := fstat(file)
if err != nil {
return fmt.Errorf("check for dead inode: %w", err)
}
if stat.Nlink == 0 {
err := errDeletedInode
if stat.Mode&unix.S_IFMT == unix.S_IFDIR {
err = errInvalidDirectory
}
return fmt.Errorf("%w %q", err, file.Name())
}
return nil
}
func checkProcSelfFdPath(path string, file *os.File) error {
if err := isDeadInode(file); err != nil {
return err
}
actualPath, err := procSelfFdReadlink(file)
if err != nil {
return fmt.Errorf("get path of handle: %w", err)
}
if actualPath != path {
return fmt.Errorf("%w: handle path %q doesn't match expected path %q", errPossibleBreakout, actualPath, path)
}
return nil
}
// Test hooks used in the procfs tests to verify that the fallback logic works.
// See testing_mocks_linux_test.go and procfs_linux_test.go for more details.
var (
hookForcePrivateProcRootOpenTree = hookDummyFile
hookForcePrivateProcRootOpenTreeAtRecursive = hookDummyFile
hookForceGetProcRootUnsafe = hookDummy
hookForceProcSelfTask = hookDummy
hookForceProcSelf = hookDummy
)
func hookDummy() bool { return false }
func hookDummyFile(_ *os.File) bool { return false }

35
vendor/github.com/cyphar/filepath-securejoin/vfs.go generated vendored Normal file
View File

@ -0,0 +1,35 @@
// Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package securejoin
import "os"
// In future this should be moved into a separate package, because now there
// are several projects (umoci and go-mtree) that are using this sort of
// interface.
// VFS is the minimal interface necessary to use [SecureJoinVFS]. A nil VFS is
// equivalent to using the standard [os].* family of functions. This is mainly
// used for the purposes of mock testing, but also can be used to otherwise use
// [SecureJoinVFS] with VFS-like system.
type VFS interface {
// Lstat returns an [os.FileInfo] describing the named file. If the
// file is a symbolic link, the returned [os.FileInfo] describes the
// symbolic link. Lstat makes no attempt to follow the link.
// The semantics are identical to [os.Lstat].
Lstat(name string) (os.FileInfo, error)
// Readlink returns the destination of the named symbolic link.
// The semantics are identical to [os.Readlink].
Readlink(name string) (string, error)
}
// osVFS is the "nil" VFS, in that it just passes everything through to the os
// module.
type osVFS struct{}
func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) }
func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) }

191
vendor/github.com/opencontainers/runc/LICENSE generated vendored Normal file
View File

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2014 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

17
vendor/github.com/opencontainers/runc/NOTICE generated vendored Normal file
View File

@ -0,0 +1,17 @@
runc
Copyright 2012-2015 Docker, Inc.
This product includes software developed at Docker, Inc. (http://www.docker.com).
The following is courtesy of our legal counsel:
Use and transfer of Docker may be subject to certain restrictions by the
United States and other governments.
It is your responsibility to ensure that your use and/or transfer does not
violate applicable laws.
For more information, please see http://www.bis.doc.gov
See also http://www.apache.org/dev/crypto.html and/or seek legal counsel.

View File

@ -0,0 +1,258 @@
package dmz
import (
"errors"
"fmt"
"io"
"os"
"strconv"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/system"
)
type SealFunc func(**os.File) error
var (
_ SealFunc = sealMemfd
_ SealFunc = sealFile
)
func isExecutable(f *os.File) bool {
if err := unix.Faccessat(int(f.Fd()), "", unix.X_OK, unix.AT_EACCESS|unix.AT_EMPTY_PATH); err == nil {
return true
} else if err == unix.EACCES {
return false
}
path := "/proc/self/fd/" + strconv.Itoa(int(f.Fd()))
if err := unix.Access(path, unix.X_OK); err == nil {
return true
} else if err == unix.EACCES {
return false
}
// Cannot check -- assume it's executable (if not, exec will fail).
logrus.Debugf("cannot do X_OK check on binary %s -- assuming it's executable", f.Name())
return true
}
const baseMemfdSeals = unix.F_SEAL_SEAL | unix.F_SEAL_SHRINK | unix.F_SEAL_GROW | unix.F_SEAL_WRITE
func sealMemfd(f **os.File) error {
if err := (*f).Chmod(0o511); err != nil {
return err
}
// Try to set the newer memfd sealing flags, but we ignore
// errors because they are not needed and we want to continue
// to work on older kernels.
fd := (*f).Fd()
// F_SEAL_FUTURE_WRITE -- Linux 5.1
_, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, unix.F_SEAL_FUTURE_WRITE)
// F_SEAL_EXEC -- Linux 6.3
const F_SEAL_EXEC = 0x20 //nolint:revive // this matches the unix.* name
_, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, F_SEAL_EXEC)
// Apply all original memfd seals.
_, err := unix.FcntlInt(fd, unix.F_ADD_SEALS, baseMemfdSeals)
return os.NewSyscallError("fcntl(F_ADD_SEALS)", err)
}
// Memfd creates a sealable executable memfd (supported since Linux 3.17).
func Memfd(comment string) (*os.File, SealFunc, error) {
file, err := system.ExecutableMemfd("runc_cloned:"+comment, unix.MFD_ALLOW_SEALING|unix.MFD_CLOEXEC)
return file, sealMemfd, err
}
func sealFile(f **os.File) error {
// When sealing an O_TMPFILE-style descriptor we need to
// re-open the path as O_PATH to clear the existing write
// handle we have.
opath, err := os.OpenFile(fmt.Sprintf("/proc/self/fd/%d", (*f).Fd()), unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("reopen tmpfile: %w", err)
}
_ = (*f).Close()
*f = opath
return nil
}
// otmpfile creates an open(O_TMPFILE) file in the given directory (supported
// since Linux 3.11).
func otmpfile(dir string) (*os.File, SealFunc, error) {
file, err := os.OpenFile(dir, unix.O_TMPFILE|unix.O_RDWR|unix.O_EXCL|unix.O_CLOEXEC, 0o700)
if err != nil {
return nil, nil, fmt.Errorf("O_TMPFILE creation failed: %w", err)
}
// Make sure we actually got an unlinked O_TMPFILE descriptor.
var stat unix.Stat_t
if err := unix.Fstat(int(file.Fd()), &stat); err != nil {
file.Close()
return nil, nil, fmt.Errorf("cannot fstat O_TMPFILE fd: %w", err)
} else if stat.Nlink != 0 {
file.Close()
return nil, nil, errors.New("O_TMPFILE has non-zero nlink")
}
return file, sealFile, err
}
// mktemp creates a classic unlinked file in the given directory.
func mktemp(dir string) (*os.File, SealFunc, error) {
file, err := os.CreateTemp(dir, "runc.")
if err != nil {
return nil, nil, err
}
// Unlink the file and verify it was unlinked.
if err := os.Remove(file.Name()); err != nil {
return nil, nil, fmt.Errorf("unlinking classic tmpfile: %w", err)
}
if err := file.Chmod(0o511); err != nil {
return nil, nil, fmt.Errorf("chmod classic tmpfile: %w", err)
}
var stat unix.Stat_t
if err := unix.Fstat(int(file.Fd()), &stat); err != nil {
return nil, nil, fmt.Errorf("cannot fstat classic tmpfile: %w", err)
} else if stat.Nlink != 0 {
return nil, nil, fmt.Errorf("classic tmpfile %s has non-zero nlink after unlink", file.Name())
}
return file, sealFile, err
}
func getSealableFile(comment, tmpDir string) (file *os.File, sealFn SealFunc, err error) {
// First, try an executable memfd (supported since Linux 3.17).
file, sealFn, err = Memfd(comment)
if err == nil {
return
}
logrus.Debugf("memfd cloned binary failed, falling back to O_TMPFILE: %v", err)
// The tmpDir here (c.root) might be mounted noexec, so we need a couple of
// fallbacks to try. It's possible that none of these are writable and
// executable, in which case there's nothing we can practically do (other
// than mounting our own executable tmpfs, which would have its own
// issues).
tmpDirs := []string{
tmpDir,
os.TempDir(),
"/tmp",
".",
"/bin",
"/",
}
// Try to fallback to O_TMPFILE (supported since Linux 3.11).
for _, dir := range tmpDirs {
file, sealFn, err = otmpfile(dir)
if err != nil {
continue
}
if !isExecutable(file) {
logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir)
file.Close()
continue
}
return
}
logrus.Debugf("O_TMPFILE cloned binary failed, falling back to mktemp(): %v", err)
// Finally, try a classic unlinked temporary file.
for _, dir := range tmpDirs {
file, sealFn, err = mktemp(dir)
if err != nil {
continue
}
if !isExecutable(file) {
logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir)
file.Close()
continue
}
return
}
return nil, nil, fmt.Errorf("could not create sealable file for cloned binary: %w", err)
}
// CloneBinary creates a "sealed" clone of a given binary, which can be used to
// thwart attempts by the container process to gain access to host binaries
// through procfs magic-link shenanigans. For more details on why this is
// necessary, see CVE-2019-5736.
func CloneBinary(src io.Reader, size int64, name, tmpDir string) (*os.File, error) {
logrus.Debugf("cloning %s binary (%d bytes)", name, size)
file, sealFn, err := getSealableFile(name, tmpDir)
if err != nil {
return nil, err
}
copied, err := system.Copy(file, src)
if err != nil {
file.Close()
return nil, fmt.Errorf("copy binary: %w", err)
} else if copied != size {
file.Close()
return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size)
}
if err := sealFn(&file); err != nil {
file.Close()
return nil, fmt.Errorf("could not seal fd: %w", err)
}
return file, nil
}
// IsCloned returns whether the given file can be guaranteed to be a safe exe.
func IsCloned(exe *os.File) bool {
seals, err := unix.FcntlInt(exe.Fd(), unix.F_GET_SEALS, 0)
if err != nil {
// /proc/self/exe is probably not a memfd
logrus.Debugf("F_GET_SEALS on %s failed: %v", exe.Name(), err)
return false
}
// The memfd must have all of the base seals applied.
logrus.Debugf("checking %s memfd seals: 0x%x", exe.Name(), seals)
return seals&baseMemfdSeals == baseMemfdSeals
}
// CloneSelfExe makes a clone of the current process's binary (through
// /proc/self/exe). This binary can then be used for "runc init" in order to
// make sure the container process can never resolve the original runc binary.
// For more details on why this is necessary, see CVE-2019-5736.
func CloneSelfExe(tmpDir string) (*os.File, error) {
// Try to create a temporary overlayfs to produce a readonly version of
// /proc/self/exe that cannot be "unwrapped" by the container. In contrast
// to CloneBinary, this technique does not require any extra memory usage
// and does not have the (fairly noticeable) performance impact of copying
// a large binary file into a memfd.
//
// Based on some basic performance testing, the overlayfs approach has
// effectively no performance overhead (it is on par with both
// MS_BIND+MS_RDONLY and no binary cloning at all) while memfd copying adds
// around ~60% overhead during container startup.
overlayFile, err := sealedOverlayfs("/proc/self/exe", tmpDir)
if err == nil {
logrus.Debug("runc-dmz: using overlayfs for sealed /proc/self/exe") // used for tests
return overlayFile, nil
}
logrus.WithError(err).Debugf("could not use overlayfs for /proc/self/exe sealing -- falling back to making a temporary copy")
selfExe, err := os.Open("/proc/self/exe")
if err != nil {
return nil, fmt.Errorf("opening current binary: %w", err)
}
defer selfExe.Close()
stat, err := selfExe.Stat()
if err != nil {
return nil, fmt.Errorf("checking /proc/self/exe size: %w", err)
}
size := stat.Size()
return CloneBinary(selfExe, size, "/proc/self/exe", tmpDir)
}
// IsSelfExeCloned returns whether /proc/self/exe is a cloned binary that can
// be guaranteed to be safe. This means that it must be a sealed memfd. Other
// types of clones cannot be completely verified as safe.
func IsSelfExeCloned() bool {
selfExe, err := os.Open("/proc/self/exe")
if err != nil {
logrus.Debugf("open /proc/self/exe failed: %v", err)
return false
}
defer selfExe.Close()
return IsCloned(selfExe)
}

View File

@ -0,0 +1,122 @@
package dmz
import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/utils"
)
func fsopen(fsName string, flags int) (*os.File, error) {
// Make sure we always set O_CLOEXEC.
flags |= unix.FSOPEN_CLOEXEC
fd, err := unix.Fsopen(fsName, flags)
if err != nil {
return nil, os.NewSyscallError("fsopen "+fsName, err)
}
return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil
}
func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) {
// Make sure we always set O_CLOEXEC.
flags |= unix.FSMOUNT_CLOEXEC
fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs)
if err != nil {
return nil, os.NewSyscallError("fsmount "+ctx.Name(), err)
}
runtime.KeepAlive(ctx) // make sure fd is kept alive while it's used
return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil
}
func escapeOverlayLowerDir(path string) string {
// If the lowerdir path contains ":" we need to escape them, and if there
// were any escape characters already (\) we need to escape those first.
return strings.ReplaceAll(strings.ReplaceAll(path, `\`, `\\`), `:`, `\:`)
}
// sealedOverlayfs will create an internal overlayfs mount using fsopen() that
// uses the directory containing the binary as a lowerdir and a temporary tmpfs
// as an upperdir. There is no way to "unwrap" this (unlike MS_BIND+MS_RDONLY)
// and so we can create a safe zero-copy sealed version of /proc/self/exe.
// This only works for privileged users and on kernels with overlayfs and
// fsopen() enabled.
//
// TODO: Since Linux 5.11, overlayfs can be created inside user namespaces so
// it is technically possible to create an overlayfs even for rootless
// containers. Unfortunately, this would require some ugly manual CGo+fork
// magic so we can do this later if we feel it's really needed.
func sealedOverlayfs(binPath, tmpDir string) (_ *os.File, Err error) {
// Try to do the superblock creation first to bail out early if we can't
// use this method.
overlayCtx, err := fsopen("overlay", unix.FSOPEN_CLOEXEC)
if err != nil {
return nil, err
}
defer overlayCtx.Close()
// binPath is going to be /proc/self/exe, so do a readlink to get the real
// path. overlayfs needs the real underlying directory for this protection
// mode to work properly.
if realPath, err := os.Readlink(binPath); err == nil {
binPath = realPath
}
binLowerDirPath, binName := filepath.Split(binPath)
// Escape any ":"s or "\"s in the path.
binLowerDirPath = escapeOverlayLowerDir(binLowerDirPath)
// Overlayfs requires two lowerdirs in order to run in "lower-only" mode,
// where writes are completely blocked. Ideally we would create a dummy
// tmpfs for this, but it turns out that overlayfs doesn't allow for
// anonymous mountns paths.
// NOTE: I'm working on a patch to fix this but it won't be backported.
dummyLowerDirPath := escapeOverlayLowerDir(tmpDir)
// Configure the lowerdirs. The binary lowerdir needs to be on the top to
// ensure that a file called "runc" (binName) in the dummy lowerdir doesn't
// mask the binary.
lowerDirStr := binLowerDirPath + ":" + dummyLowerDirPath
if err := unix.FsconfigSetString(int(overlayCtx.Fd()), "lowerdir", lowerDirStr); err != nil {
return nil, fmt.Errorf("fsconfig set overlayfs lowerdir=%s: %w", lowerDirStr, err)
}
// We don't care about xino (Linux 4.17) but it will be auto-enabled on
// some systems (if /run/runc and /usr/bin are on different filesystems)
// and this produces spurious dmesg log entries. We can safely ignore
// errors when disabling this because we don't actually care about the
// setting and we're just opportunistically disabling it.
_ = unix.FsconfigSetString(int(overlayCtx.Fd()), "xino", "off")
// Get an actual handle to the overlayfs.
if err := unix.FsconfigCreate(int(overlayCtx.Fd())); err != nil {
return nil, os.NewSyscallError("fsconfig create overlayfs", err)
}
overlayFd, err := fsmount(overlayCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOSUID)
if err != nil {
return nil, err
}
defer overlayFd.Close()
// Grab a handle to the binary through overlayfs.
exeFile, err := utils.Openat(overlayFd, binName, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open %s from overlayfs (lowerdir=%s): %w", binName, lowerDirStr, err)
}
// NOTE: We would like to check that exeFile is the same as /proc/self/exe,
// except this is a little difficult. Depending on what filesystems the
// layers are on, overlayfs can remap the inode numbers (and it always
// creates its own device numbers -- see ovl_map_dev_ino) so we can't do a
// basic stat-based check. The only reasonable option would be to hash both
// files and compare them, but this would require fully reading both files
// which would produce a similar performance overhead to memfd cloning.
//
// Ultimately, there isn't a real attack to be worried about here. An
// attacker would need to be able to modify files in /usr/sbin (or wherever
// runc lives), at which point they could just replace the runc binary with
// something malicious anyway.
return exeFile, nil
}

View File

@ -0,0 +1,216 @@
//go:build linux
package system
import (
"fmt"
"io"
"os"
"strconv"
"syscall"
"unsafe"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
type ParentDeathSignal int
func (p ParentDeathSignal) Restore() error {
if p == 0 {
return nil
}
current, err := GetParentDeathSignal()
if err != nil {
return err
}
if p == current {
return nil
}
return p.Set()
}
func (p ParentDeathSignal) Set() error {
return SetParentDeathSignal(uintptr(p))
}
func Exec(cmd string, args []string, env []string) error {
for {
err := unix.Exec(cmd, args, env)
if err != unix.EINTR {
return &os.PathError{Op: "exec", Path: cmd, Err: err}
}
}
}
func execveat(fd uintptr, pathname string, args []string, env []string, flags int) error {
pathnamep, err := syscall.BytePtrFromString(pathname)
if err != nil {
return err
}
argvp, err := syscall.SlicePtrFromStrings(args)
if err != nil {
return err
}
envp, err := syscall.SlicePtrFromStrings(env)
if err != nil {
return err
}
_, _, errno := syscall.Syscall6(
unix.SYS_EXECVEAT,
fd,
uintptr(unsafe.Pointer(pathnamep)),
uintptr(unsafe.Pointer(&argvp[0])),
uintptr(unsafe.Pointer(&envp[0])),
uintptr(flags),
0,
)
return errno
}
func Fexecve(fd uintptr, args []string, env []string) error {
var err error
for {
err = execveat(fd, "", args, env, unix.AT_EMPTY_PATH)
if err != unix.EINTR { // nolint:errorlint // unix errors are bare
break
}
}
if err == unix.ENOSYS { // nolint:errorlint // unix errors are bare
// Fallback to classic /proc/self/fd/... exec.
return Exec("/proc/self/fd/"+strconv.Itoa(int(fd)), args, env)
}
return os.NewSyscallError("execveat", err)
}
func SetParentDeathSignal(sig uintptr) error {
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
return err
}
return nil
}
func GetParentDeathSignal() (ParentDeathSignal, error) {
var sig int
if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
return -1, err
}
return ParentDeathSignal(sig), nil
}
func SetKeepCaps() error {
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
return err
}
return nil
}
func ClearKeepCaps() error {
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
return err
}
return nil
}
func Setctty() error {
if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
return err
}
return nil
}
// SetSubreaper sets the value i as the subreaper setting for the calling process
func SetSubreaper(i int) error {
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
}
// GetSubreaper returns the subreaper setting for the calling process
func GetSubreaper() (int, error) {
var i uintptr
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
return -1, err
}
return int(i), nil
}
func ExecutableMemfd(comment string, flags int) (*os.File, error) {
// Try to use MFD_EXEC first. On pre-6.3 kernels we get -EINVAL for this
// flag. On post-6.3 kernels, with vm.memfd_noexec=1 this ensures we get an
// executable memfd. For vm.memfd_noexec=2 this is a bit more complicated.
// The original vm.memfd_noexec=2 implementation incorrectly silently
// allowed MFD_EXEC[1] -- this should be fixed in 6.6. On 6.6 and newer
// kernels, we will get -EACCES if we try to use MFD_EXEC with
// vm.memfd_noexec=2 (for 6.3-6.5, -EINVAL was the intended return value).
//
// The upshot is we only need to retry without MFD_EXEC on -EINVAL because
// it just so happens that passing MFD_EXEC bypasses vm.memfd_noexec=2 on
// kernels where -EINVAL is actually a security denial.
memfd, err := unix.MemfdCreate(comment, flags|unix.MFD_EXEC)
if err == unix.EINVAL {
memfd, err = unix.MemfdCreate(comment, flags)
}
if err != nil {
if err == unix.EACCES {
logrus.Info("memfd_create(MFD_EXEC) failed, possibly due to vm.memfd_noexec=2 -- falling back to less secure O_TMPFILE")
}
err := os.NewSyscallError("memfd_create", err)
return nil, fmt.Errorf("failed to create executable memfd: %w", err)
}
return os.NewFile(uintptr(memfd), "/memfd:"+comment), nil
}
// Copy is like io.Copy except it uses sendfile(2) if the source and sink are
// both (*os.File) as an optimisation to make copies faster.
func Copy(dst io.Writer, src io.Reader) (copied int64, err error) {
dstFile, _ := dst.(*os.File)
srcFile, _ := src.(*os.File)
if dstFile != nil && srcFile != nil {
fi, err := srcFile.Stat()
if err != nil {
goto fallback
}
size := fi.Size()
for size > 0 {
n, err := unix.Sendfile(int(dstFile.Fd()), int(srcFile.Fd()), nil, int(size))
if n > 0 {
size -= int64(n)
copied += int64(n)
}
if err == unix.EINTR {
continue
}
if err != nil {
if copied == 0 {
// If we haven't copied anything so far, we can safely just
// fallback to io.Copy. We could always do the fallback but
// it's safer to error out in the case of a partial copy
// followed by an error (which should never happen).
goto fallback
}
return copied, fmt.Errorf("partial sendfile copy: %w", err)
}
}
return copied, nil
}
fallback:
return io.Copy(dst, src)
}
// SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation.
// checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion.
func SetLinuxPersonality(personality int) error {
_, _, errno := unix.Syscall(unix.SYS_PERSONALITY, uintptr(personality), 0, 0)
if errno != 0 {
return &os.SyscallError{Syscall: "set_personality", Err: errno}
}
return nil
}

View File

@ -0,0 +1,127 @@
package system
import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
)
// State is the status of a process.
type State rune
const ( // Only values for Linux 3.14 and later are listed here
Dead State = 'X'
DiskSleep State = 'D'
Running State = 'R'
Sleeping State = 'S'
Stopped State = 'T'
TracingStop State = 't'
Zombie State = 'Z'
Parked State = 'P'
Idle State = 'I'
)
// String forms of the state from proc(5)'s documentation for
// /proc/[pid]/status' "State" field.
func (s State) String() string {
switch s {
case Dead:
return "dead"
case DiskSleep:
return "disk sleep"
case Running:
return "running"
case Sleeping:
return "sleeping"
case Stopped:
return "stopped"
case TracingStop:
return "tracing stop"
case Zombie:
return "zombie"
case Parked:
return "parked"
case Idle:
return "idle" // kernel thread
default:
return fmt.Sprintf("unknown (%c)", s)
}
}
// Stat_t represents the information from /proc/[pid]/stat, as
// described in proc(5) with names based on the /proc/[pid]/status
// fields.
type Stat_t struct {
// Name is the command run by the process.
Name string
// State is the state of the process.
State State
// StartTime is the number of clock ticks after system boot (since
// Linux 2.6).
StartTime uint64
}
// Stat returns a Stat_t instance for the specified process.
func Stat(pid int) (stat Stat_t, err error) {
bytes, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
if err != nil {
return stat, err
}
return parseStat(string(bytes))
}
func parseStat(data string) (stat Stat_t, err error) {
// Example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
// The fields are space-separated, see full description in proc(5).
//
// We are only interested in:
// * field 2: process name. It is the only field enclosed into
// parenthesis, as it can contain spaces (and parenthesis) inside.
// * field 3: process state, a single character (%c)
// * field 22: process start time, a long unsigned integer (%llu).
// 1. Look for the first '(' and the last ')' first, what's in between is Name.
// We expect at least 20 fields and a space after the last one.
const minAfterName = 20*2 + 1 // the min field is '0 '.
first := strings.IndexByte(data, '(')
if first < 0 || first+minAfterName >= len(data) {
return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
}
last := strings.LastIndexByte(data, ')')
if last <= first || last+minAfterName >= len(data) {
return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
}
stat.Name = data[first+1 : last]
// 2. Remove fields 1 and 2 and a space after. State is right after.
data = data[last+2:]
stat.State = State(data[0])
// 3. StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces.
skipSpaces := 22 - 3
for first = 0; skipSpaces > 0 && first < len(data); first++ {
if data[first] == ' ' {
skipSpaces--
}
}
// Now first points to StartTime; look for space right after.
i := strings.IndexByte(data[first:], ' ')
if i < 0 {
return stat, fmt.Errorf("invalid stat data (too short): %q", data)
}
stat.StartTime, err = strconv.ParseUint(data[first:first+i], 10, 64)
if err != nil {
return stat, fmt.Errorf("invalid stat data (bad start time): %w", err)
}
return stat, nil
}

View File

@ -0,0 +1,15 @@
//go:build go1.23
package system
import (
"syscall"
)
// ClearRlimitNofileCache clears go runtime's nofile rlimit cache. The argument
// is process RLIMIT_NOFILE values. Relies on go.dev/cl/588076.
func ClearRlimitNofileCache(lim *syscall.Rlimit) {
// Ignore the return values since we only need to clean the cache,
// the limit is going to be set via unix.Prlimit elsewhere.
_ = syscall.Setrlimit(syscall.RLIMIT_NOFILE, lim)
}

View File

@ -0,0 +1,27 @@
//go:build !go1.23
// TODO: remove this file once go 1.22 is no longer supported.
package system
import (
"sync/atomic"
"syscall"
_ "unsafe" // Needed for go:linkname to work.
)
//go:linkname syscallOrigRlimitNofile syscall.origRlimitNofile
var syscallOrigRlimitNofile atomic.Pointer[syscall.Rlimit]
// ClearRlimitNofileCache clears go runtime's nofile rlimit cache.
// The argument is process RLIMIT_NOFILE values.
func ClearRlimitNofileCache(_ *syscall.Rlimit) {
// As reported in issue #4195, the new version of go runtime(since 1.19)
// will cache rlimit-nofile. Before executing execve, the rlimit-nofile
// of the process will be restored with the cache. In runc, this will
// cause the rlimit-nofile setting by the parent process for the container
// to become invalid. It can be solved by clearing this cache. But
// unfortunately, go stdlib doesn't provide such function, so we need to
// link to the private var `origRlimitNofile` in package syscall to hack.
syscallOrigRlimitNofile.Store(nil)
}

View File

@ -0,0 +1,119 @@
package utils
/*
* Copyright 2016, 2017 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import (
"fmt"
"os"
"runtime"
"golang.org/x/sys/unix"
)
// MaxNameLen is the maximum length of the name of a file descriptor being sent
// using SendFile. The name of the file handle returned by RecvFile will never be
// larger than this value.
const MaxNameLen = 4096
// oobSpace is the size of the oob slice required to store a single FD. Note
// that unix.UnixRights appears to make the assumption that fd is always int32,
// so sizeof(fd) = 4.
var oobSpace = unix.CmsgSpace(4)
// RecvFile waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxiliary data in the same payload).
func RecvFile(socket *os.File) (_ *os.File, Err error) {
name := make([]byte, MaxNameLen)
oob := make([]byte, oobSpace)
sockfd := socket.Fd()
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC)
if err != nil {
return nil, err
}
if n >= MaxNameLen || oobn != oobSpace {
return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
}
// Truncate.
name = name[:n]
oob = oob[:oobn]
scms, err := unix.ParseSocketControlMessage(oob)
if err != nil {
return nil, err
}
// We cannot control how many SCM_RIGHTS we receive, and upon receiving
// them all of the descriptors are installed in our fd table, so we need to
// parse all of the SCM_RIGHTS we received in order to close all of the
// descriptors on error.
var fds []int
defer func() {
for i, fd := range fds {
if i == 0 && Err == nil {
// Only close the first one on error.
continue
}
// Always close extra ones.
_ = unix.Close(fd)
}
}()
var lastErr error
for _, scm := range scms {
if scm.Header.Type == unix.SCM_RIGHTS {
scmFds, err := unix.ParseUnixRights(&scm)
if err != nil {
lastErr = err
} else {
fds = append(fds, scmFds...)
}
}
}
if lastErr != nil {
return nil, lastErr
}
// We do this after collecting the fds to make sure we close them all when
// returning an error here.
if len(scms) != 1 {
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
}
if len(fds) != 1 {
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
}
return os.NewFile(uintptr(fds[0]), string(name)), nil
}
// SendFile sends a file over the given AF_UNIX socket. file.Name() is also
// included so that if the other end uses RecvFile, the file will have the same
// name information.
func SendFile(socket *os.File, file *os.File) error {
name := file.Name()
if len(name) >= MaxNameLen {
return fmt.Errorf("sendfd: filename too long: %s", name)
}
err := SendRawFd(socket, name, file.Fd())
runtime.KeepAlive(file)
return err
}
// SendRawFd sends a specific file descriptor over the given AF_UNIX socket.
func SendRawFd(socket *os.File, msg string, fd uintptr) error {
oob := unix.UnixRights(int(fd))
return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0)
}

View File

@ -0,0 +1,115 @@
package utils
import (
"encoding/json"
"io"
"os"
"path/filepath"
"strings"
"golang.org/x/sys/unix"
)
const (
exitSignalOffset = 128
)
// ExitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly
func ExitStatus(status unix.WaitStatus) int {
if status.Signaled() {
return exitSignalOffset + int(status.Signal())
}
return status.ExitStatus()
}
// WriteJSON writes the provided struct v to w using standard json marshaling
// without a trailing newline. This is used instead of json.Encoder because
// there might be a problem in json decoder in some cases, see:
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
func WriteJSON(w io.Writer, v interface{}) error {
data, err := json.Marshal(v)
if err != nil {
return err
}
_, err = w.Write(data)
return err
}
// CleanPath makes a path safe for use with filepath.Join. This is done by not
// only cleaning the path, but also (if the path is relative) adding a leading
// '/' and cleaning it (then removing the leading '/'). This ensures that a
// path resulting from prepending another path will always resolve to lexically
// be a subdirectory of the prefixed path. This is all done lexically, so paths
// that include symlinks won't be safe as a result of using CleanPath.
func CleanPath(path string) string {
// Deal with empty strings nicely.
if path == "" {
return ""
}
// Ensure that all paths are cleaned (especially problematic ones like
// "/../../../../../" which can cause lots of issues).
path = filepath.Clean(path)
// If the path isn't absolute, we need to do more processing to fix paths
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
// paths to relative ones.
if !filepath.IsAbs(path) {
path = filepath.Clean(string(os.PathSeparator) + path)
// This can't fail, as (by definition) all paths are relative to root.
path, _ = filepath.Rel(string(os.PathSeparator), path)
}
// Clean the path again for good measure.
return filepath.Clean(path)
}
// stripRoot returns the passed path, stripping the root path if it was
// (lexicially) inside it. Note that both passed paths will always be treated
// as absolute, and the returned path will also always be absolute. In
// addition, the paths are cleaned before stripping the root.
func stripRoot(root, path string) string {
// Make the paths clean and absolute.
root, path = CleanPath("/"+root), CleanPath("/"+path)
switch {
case path == root:
path = "/"
case root == "/":
// do nothing
case strings.HasPrefix(path, root+"/"):
path = strings.TrimPrefix(path, root+"/")
}
return CleanPath("/" + path)
}
// SearchLabels searches through a list of key=value pairs for a given key,
// returning its value, and the binary flag telling whether the key exist.
func SearchLabels(labels []string, key string) (string, bool) {
key += "="
for _, s := range labels {
if strings.HasPrefix(s, key) {
return s[len(key):], true
}
}
return "", false
}
// Annotations returns the bundle path and user defined annotations from the
// libcontainer state. We need to remove the bundle because that is a label
// added by libcontainer.
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
userAnnotations = make(map[string]string)
for _, l := range labels {
name, value, ok := strings.Cut(l, "=")
if !ok {
continue
}
if name == "bundle" {
bundle = value
} else {
userAnnotations[name] = value
}
}
return
}

View File

@ -0,0 +1,360 @@
//go:build !windows
package utils
import (
"fmt"
"math"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
_ "unsafe" // for go:linkname
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// EnsureProcHandle returns whether or not the given file handle is on procfs.
func EnsureProcHandle(fh *os.File) error {
var buf unix.Statfs_t
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
}
if buf.Type != unix.PROC_SUPER_MAGIC {
return fmt.Errorf("%s is not on procfs", fh.Name())
}
return nil
}
var (
haveCloseRangeCloexecBool bool
haveCloseRangeCloexecOnce sync.Once
)
func haveCloseRangeCloexec() bool {
haveCloseRangeCloexecOnce.Do(func() {
// Make sure we're not closing a random file descriptor.
tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return
}
defer unix.Close(tmpFd)
err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC)
// Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC).
// -ENOSYS and -EINVAL ultimately mean we don't have support, but any
// other potential error would imply that even the most basic close
// operation wouldn't work.
haveCloseRangeCloexecBool = err == nil
})
return haveCloseRangeCloexecBool
}
type fdFunc func(fd int)
// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
// the current process.
func fdRangeFrom(minFd int, fn fdFunc) error {
procSelfFd, closer := ProcThreadSelf("fd")
defer closer()
fdDir, err := os.Open(procSelfFd)
if err != nil {
return err
}
defer fdDir.Close()
if err := EnsureProcHandle(fdDir); err != nil {
return err
}
fdList, err := fdDir.Readdirnames(-1)
if err != nil {
return err
}
for _, fdStr := range fdList {
fd, err := strconv.Atoi(fdStr)
// Ignore non-numeric file names.
if err != nil {
continue
}
// Ignore descriptors lower than our specified minimum.
if fd < minFd {
continue
}
// Ignore the file descriptor we used for readdir, as it will be closed
// when we return.
if uintptr(fd) == fdDir.Fd() {
continue
}
// Run the closure.
fn(fd)
}
return nil
}
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
// equal to minFd in the current process.
func CloseExecFrom(minFd int) error {
// Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
if haveCloseRangeCloexec() {
err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
return os.NewSyscallError("close_range", err)
}
// Otherwise, fall back to the standard loop.
return fdRangeFrom(minFd, unix.CloseOnExec)
}
//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
// In order to make sure we do not close the internal epoll descriptors the Go
// runtime uses, we need to ensure that we skip descriptors that match
// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
// unfortunately there's no other way to be sure we're only keeping the file
// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
// current process, except for those critical to Go's runtime (such as the
// netpoll management descriptors).
//
// NOTE: That this function is incredibly dangerous to use in most Go code, as
// closing file descriptors from underneath *os.File handles can lead to very
// bad behaviour (the closed file descriptor can be re-used and then any
// *os.File operations would apply to the wrong file). This function is only
// intended to be called from the last stage of runc init.
func UnsafeCloseFrom(minFd int) error {
// We cannot use close_range(2) even if it is available, because we must
// not close some file descriptors.
return fdRangeFrom(minFd, func(fd int) {
if runtime_IsPollDescriptor(uintptr(fd)) {
// These are the Go runtimes internal netpoll file descriptors.
// These file descriptors are operated on deep in the Go scheduler,
// and closing those files from underneath Go can result in panics.
// There is no issue with keeping them because they are not
// executable and are not useful to an attacker anyway. Also we
// don't have any choice.
return
}
// There's nothing we can do about errors from close(2), and the
// only likely error to be seen is EBADF which indicates the fd was
// already closed (in which case, we got what we wanted).
_ = unix.Close(fd)
})
}
// NewSockPair returns a new SOCK_STREAM unix socket pair.
func NewSockPair(name string) (parent, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
// corresponding to the unsafePath resolved within the root. Before passing the
// fd, this path is verified to have been inside the root -- so operating on it
// through the passed fdpath should be safe. Do not access this path through
// the original path strings, and do not attempt to use the pathname outside of
// the passed closure (the file handle will be freed once the closure returns).
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
// Remove the root then forcefully resolve inside the root.
unsafePath = stripRoot(root, unsafePath)
path, err := securejoin.SecureJoin(root, unsafePath)
if err != nil {
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
}
procSelfFd, closer := ProcThreadSelf("fd/")
defer closer()
// Open the target path.
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open o_path procfd: %w", err)
}
defer fh.Close()
procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd())))
// Double-check the path is the one we expected.
if realpath, err := os.Readlink(procfd); err != nil {
return fmt.Errorf("procfd verification failed: %w", err)
} else if realpath != path {
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
}
return fn(procfd)
}
type ProcThreadSelfCloser func()
var (
haveProcThreadSelf bool
haveProcThreadSelfOnce sync.Once
)
// ProcThreadSelf returns a string that is equivalent to
// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where
// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin,
// meaning that the passed string needs to be trusted. The caller _must_ call
// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread)
// *only once* after it has finished using the returned path string.
func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
haveProcThreadSelfOnce.Do(func() {
if _, err := os.Stat("/proc/thread-self/"); err == nil {
haveProcThreadSelf = true
} else {
logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
}
})
// We need to lock our thread until the caller is done with the path string
// because any non-atomic operation on the path (such as opening a file,
// then reading it) could be interrupted by the Go runtime where the
// underlying thread is swapped out and the original thread is killed,
// resulting in pull-your-hair-out-hard-to-debug issues in the caller. In
// addition, the pre-3.17 fallback makes everything non-atomic because the
// same thing could happen between unix.Gettid() and the path operations.
//
// In theory, we don't need to lock in the atomic user case when using
// /proc/thread-self/, but it's better to be safe than sorry (and there are
// only one or two truly atomic users of /proc/thread-self/).
runtime.LockOSThread()
threadSelf := "/proc/thread-self/"
if !haveProcThreadSelf {
// Pre-3.17 kernels did not have /proc/thread-self, so do it manually.
threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/"
if _, err := os.Stat(threadSelf); err != nil {
// Unfortunately, this code is called from rootfs_linux.go where we
// are running inside the pid namespace of the container but /proc
// is the host's procfs. Unfortunately there is no real way to get
// the correct tid to use here (the kernel age means we cannot do
// things like set up a private fsopen("proc") -- even scanning
// NSpid in all of the tasks in /proc/self/task/*/status requires
// Linux 4.1).
//
// So, we just have to assume that /proc/self is acceptable in this
// one specific case.
if os.Getpid() == 1 {
logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
} else {
// This should never happen, but the fallback should work in most cases...
logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
}
threadSelf = "/proc/self/"
}
}
return threadSelf + subpath, runtime.UnlockOSThread
}
// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to
// create a /proc/thread-self handle for given file descriptor.
//
// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
// without using fmt.Sprintf to avoid unneeded overhead.
func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
}
// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"),
// but properly handling the case where path or root are "/".
//
// NOTE: The return value only make sense if the path doesn't contain "..".
func IsLexicallyInRoot(root, path string) bool {
if root != "/" {
root += "/"
}
if path != "/" {
path += "/"
}
return strings.HasPrefix(path, root)
}
// MkdirAllInRootOpen attempts to make
//
// path, _ := securejoin.SecureJoin(root, unsafePath)
// os.MkdirAll(path, mode)
// os.Open(path)
//
// safer against attacks where components in the path are changed between
// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
// try to detect any symlink components in the path while we are doing the
// MkdirAll.
//
// NOTE: If unsafePath is a subpath of root, we assume that you have already
// called SecureJoin and so we use the provided path verbatim without resolving
// any symlinks (this is done in a way that avoids symlink-exchange races).
// This means that the path also must not contain ".." elements, otherwise an
// error will occur.
//
// This uses securejoin.MkdirAllHandle under the hood, but it has special
// handling if unsafePath has already been scoped within the rootfs (this is
// needed for a lot of runc callers and fixing this would require reworking a
// lot of path logic).
func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (_ *os.File, Err error) {
// If the path is already "within" the root, get the path relative to the
// root and use that as the unsafe path. This is necessary because a lot of
// MkdirAllInRootOpen callers have already done SecureJoin, and refactoring
// all of them to stop using these SecureJoin'd paths would require a fair
// amount of work.
// TODO(cyphar): Do the refactor to libpathrs once it's ready.
if IsLexicallyInRoot(root, unsafePath) {
subPath, err := filepath.Rel(root, unsafePath)
if err != nil {
return nil, err
}
unsafePath = subPath
}
// Check for any silly mode bits.
if mode&^0o7777 != 0 {
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
}
// Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if
// passed. While it would make sense to return an error in that case (since
// the user has asked for a mode that won't be applied), for compatibility
// reasons we have to ignore these bits.
if ignoredBits := mode &^ 0o1777; ignoredBits != 0 {
logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits)
mode &= 0o1777
}
rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open root handle: %w", err)
}
defer rootDir.Close()
return securejoin.MkdirAllHandle(rootDir, unsafePath, mode)
}
// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
// returned handle, for callers that don't need to use it.
func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) error {
f, err := MkdirAllInRootOpen(root, unsafePath, mode)
if err == nil {
_ = f.Close()
}
return err
}
// Openat is a Go-friendly openat(2) wrapper.
func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
dirFd := unix.AT_FDCWD
if dir != nil {
dirFd = int(dir.Fd())
}
flags |= unix.O_CLOEXEC
fd, err := unix.Openat(dirFd, path, flags, mode)
if err != nil {
return nil, &os.PathError{Op: "openat", Path: path, Err: err}
}
return os.NewFile(uintptr(fd), dir.Name()+"/"+path), nil
}

10
vendor/modules.txt vendored
View File

@ -14,6 +14,9 @@ github.com/NVIDIA/go-nvml/pkg/nvml/mock
# github.com/cpuguy83/go-md2man/v2 v2.0.5
## explicit; go 1.11
github.com/cpuguy83/go-md2man/v2/md2man
# github.com/cyphar/filepath-securejoin v0.4.1
## explicit; go 1.18
github.com/cyphar/filepath-securejoin
# github.com/davecgh/go-spew v1.1.1
## explicit
github.com/davecgh/go-spew/spew
@ -30,6 +33,11 @@ github.com/google/uuid
# github.com/moby/sys/symlink v0.3.0
## explicit; go 1.17
github.com/moby/sys/symlink
# github.com/opencontainers/runc v1.2.5
## explicit; go 1.22
github.com/opencontainers/runc/libcontainer/dmz
github.com/opencontainers/runc/libcontainer/system
github.com/opencontainers/runc/libcontainer/utils
# github.com/opencontainers/runtime-spec v1.2.0
## explicit
github.com/opencontainers/runtime-spec/specs-go
@ -38,8 +46,6 @@ github.com/opencontainers/runtime-spec/specs-go
github.com/opencontainers/runtime-tools/generate
github.com/opencontainers/runtime-tools/generate/seccomp
github.com/opencontainers/runtime-tools/validate/capabilities
# github.com/opencontainers/selinux v1.11.0
## explicit; go 1.19
# github.com/pelletier/go-toml v1.9.5
## explicit; go 1.12
github.com/pelletier/go-toml