mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-09 23:14:06 +00:00
Use libcontainer execseal to run ldconfig
This change copies ldconfig into a memfd before executing it from the createContainer hook. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
9429fbac5f
commit
52b9631333
57
cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go
Normal file
57
cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go
Normal file
@ -0,0 +1,57 @@
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/dmz"
|
||||
)
|
||||
|
||||
// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it.
|
||||
func (m command) SafeExec(path string, args []string, envv []string) error {
|
||||
safeExe, err := cloneBinary(path)
|
||||
if err != nil {
|
||||
m.logger.Warningf("Failed to clone binary %q: %v; falling back to Exec", path, err)
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(path, args, envv)
|
||||
}
|
||||
defer safeExe.Close()
|
||||
|
||||
exePath := "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(exePath, args, envv)
|
||||
}
|
||||
|
||||
func cloneBinary(path string) (*os.File, error) {
|
||||
exe, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening current binary: %w", err)
|
||||
}
|
||||
defer exe.Close()
|
||||
|
||||
stat, err := exe.Stat()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("checking %v size: %w", path, err)
|
||||
}
|
||||
size := stat.Size()
|
||||
|
||||
return dmz.CloneBinary(exe, size, path, os.TempDir())
|
||||
}
|
29
cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go
Normal file
29
cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go
Normal file
@ -0,0 +1,29 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
/**
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import "syscall"
|
||||
|
||||
// SafeExec is not implemented on non-linux systems and forwards directly to the
|
||||
// Exec syscall.
|
||||
func (m *command) SafeExec(path string, args []string, envv []string) error {
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(path, args, envv)
|
||||
}
|
@ -22,7 +22,6 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
@ -143,8 +142,7 @@ func (m command) run(c *cli.Context, cfg *options) error {
|
||||
// be configured to use a different config file by default.
|
||||
args = append(args, "-f", "/etc/ld.so.conf")
|
||||
|
||||
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
|
||||
return syscall.Exec(ldconfigPath, args, nil)
|
||||
return m.SafeExec(ldconfigPath, args, nil)
|
||||
}
|
||||
|
||||
// resolveLDConfigPath determines the LDConfig path to use for the system.
|
||||
|
3
go.mod
3
go.mod
@ -6,6 +6,7 @@ require (
|
||||
github.com/NVIDIA/go-nvlib v0.7.1
|
||||
github.com/NVIDIA/go-nvml v0.12.4-1
|
||||
github.com/moby/sys/symlink v0.3.0
|
||||
github.com/opencontainers/runc v1.2.5
|
||||
github.com/opencontainers/runtime-spec v1.2.0
|
||||
github.com/pelletier/go-toml v1.9.5
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
@ -19,13 +20,13 @@ require (
|
||||
|
||||
require (
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
|
||||
github.com/cyphar/filepath-securejoin v0.4.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/fsnotify/fsnotify v1.7.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
|
||||
github.com/opencontainers/selinux v1.11.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
|
||||
|
4
go.sum
4
go.sum
@ -7,6 +7,8 @@ github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2y
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
|
||||
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@ -31,6 +33,8 @@ github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34
|
||||
github.com/moby/sys/symlink v0.3.0 h1:GZX89mEZ9u53f97npBy4Rc3vJKj7JBDj/PN2I22GrNU=
|
||||
github.com/moby/sys/symlink v0.3.0/go.mod h1:3eNdhduHmYPcgsJtZXW1W4XUJdZGBIkttZ8xKqPUJq0=
|
||||
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
|
||||
github.com/opencontainers/runc v1.2.5 h1:8KAkq3Wrem8bApgOHyhRI/8IeLXIfmZ6Qaw6DNSLnA4=
|
||||
github.com/opencontainers/runc v1.2.5/go.mod h1:dOQeFo29xZKBNeRBI0B19mJtfHv68YgCTh1X+YphA+4=
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
|
||||
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
|
256
vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
generated
vendored
Normal file
256
vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
generated
vendored
Normal file
@ -0,0 +1,256 @@
|
||||
# Changelog #
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
||||
and this project adheres to [Semantic Versioning](http://semver.org/).
|
||||
|
||||
## [Unreleased] ##
|
||||
|
||||
## [0.4.1] - 2025-01-28 ##
|
||||
|
||||
### Fixed ###
|
||||
- The restrictions added for `root` paths passed to `SecureJoin` in 0.4.0 was
|
||||
found to be too strict and caused some regressions when folks tried to
|
||||
update, so this restriction has been relaxed to only return an error if the
|
||||
path contains a `..` component. We still recommend users use `filepath.Clean`
|
||||
(and even `filepath.EvalSymlinks`) on the `root` path they are using, but at
|
||||
least you will no longer be punished for "trivial" unclean paths.
|
||||
|
||||
## [0.4.0] - 2025-01-13 ##
|
||||
|
||||
### Breaking ####
|
||||
- `SecureJoin(VFS)` will now return an error if the provided `root` is not a
|
||||
`filepath.Clean`'d path.
|
||||
|
||||
While it is ultimately the responsibility of the caller to ensure the root is
|
||||
a safe path to use, passing a path like `/symlink/..` as a root would result
|
||||
in the `SecureJoin`'d path being placed in `/` even though `/symlink/..`
|
||||
might be a different directory, and so we should more strongly discourage
|
||||
such usage.
|
||||
|
||||
All major users of `securejoin.SecureJoin` already ensure that the paths they
|
||||
provide are safe (and this is ultimately a question of user error), but
|
||||
removing this foot-gun is probably a good idea. Of course, this is
|
||||
necessarily a breaking API change (though we expect no real users to be
|
||||
affected by it).
|
||||
|
||||
Thanks to [Erik Sjölund](https://github.com/eriksjolund), who initially
|
||||
reported this issue as a possible security issue.
|
||||
|
||||
- `MkdirAll` and `MkdirHandle` now take an `os.FileMode`-style mode argument
|
||||
instead of a raw `unix.S_*`-style mode argument, which may cause compile-time
|
||||
type errors depending on how you use `filepath-securejoin`. For most users,
|
||||
there will be no change in behaviour aside from the type change (as the
|
||||
bottom `0o777` bits are the same in both formats, and most users are probably
|
||||
only using those bits).
|
||||
|
||||
However, if you were using `unix.S_ISVTX` to set the sticky bit with
|
||||
`MkdirAll(Handle)` you will need to switch to `os.ModeSticky` otherwise you
|
||||
will get a runtime error with this update. In addition, the error message you
|
||||
will get from passing `unix.S_ISUID` and `unix.S_ISGID` will be different as
|
||||
they are treated as invalid bits now (note that previously passing said bits
|
||||
was also an error).
|
||||
|
||||
## [0.3.6] - 2024-12-17 ##
|
||||
|
||||
### Compatibility ###
|
||||
- The minimum Go version requirement for `filepath-securejoin` is now Go 1.18
|
||||
(we use generics internally).
|
||||
|
||||
For reference, `filepath-securejoin@v0.3.0` somewhat-arbitrarily bumped the
|
||||
Go version requirement to 1.21.
|
||||
|
||||
While we did make some use of Go 1.21 stdlib features (and in principle Go
|
||||
versions <= 1.21 are no longer even supported by upstream anymore), some
|
||||
downstreams have complained that the version bump has meant that they have to
|
||||
do workarounds when backporting fixes that use the new `filepath-securejoin`
|
||||
API onto old branches. This is not an ideal situation, but since using this
|
||||
library is probably better for most downstreams than a hand-rolled
|
||||
workaround, we now have compatibility shims that allow us to build on older
|
||||
Go versions.
|
||||
- Lower minimum version requirement for `golang.org/x/sys` to `v0.18.0` (we
|
||||
need the wrappers for `fsconfig(2)`), which should also make backporting
|
||||
patches to older branches easier.
|
||||
|
||||
## [0.3.5] - 2024-12-06 ##
|
||||
|
||||
### Fixed ###
|
||||
- `MkdirAll` will now no longer return an `EEXIST` error if two racing
|
||||
processes are creating the same directory. We will still verify that the path
|
||||
is a directory, but this will avoid spurious errors when multiple threads or
|
||||
programs are trying to `MkdirAll` the same path. opencontainers/runc#4543
|
||||
|
||||
## [0.3.4] - 2024-10-09 ##
|
||||
|
||||
### Fixed ###
|
||||
- Previously, some testing mocks we had resulted in us doing `import "testing"`
|
||||
in non-`_test.go` code, which made some downstreams like Kubernetes unhappy.
|
||||
This has been fixed. (#32)
|
||||
|
||||
## [0.3.3] - 2024-09-30 ##
|
||||
|
||||
### Fixed ###
|
||||
- The mode and owner verification logic in `MkdirAll` has been removed. This
|
||||
was originally intended to protect against some theoretical attacks but upon
|
||||
further consideration these protections don't actually buy us anything and
|
||||
they were causing spurious errors with more complicated filesystem setups.
|
||||
- The "is the created directory empty" logic in `MkdirAll` has also been
|
||||
removed. This was not causing us issues yet, but some pseudofilesystems (such
|
||||
as `cgroup`) create non-empty directories and so this logic would've been
|
||||
wrong for such cases.
|
||||
|
||||
## [0.3.2] - 2024-09-13 ##
|
||||
|
||||
### Changed ###
|
||||
- Passing the `S_ISUID` or `S_ISGID` modes to `MkdirAllInRoot` will now return
|
||||
an explicit error saying that those bits are ignored by `mkdirat(2)`. In the
|
||||
past a different error was returned, but since the silent ignoring behaviour
|
||||
is codified in the man pages a more explicit error seems apt. While silently
|
||||
ignoring these bits would be the most compatible option, it could lead to
|
||||
users thinking their code sets these bits when it doesn't. Programs that need
|
||||
to deal with compatibility can mask the bits themselves. (#23, #25)
|
||||
|
||||
### Fixed ###
|
||||
- If a directory has `S_ISGID` set, then all child directories will have
|
||||
`S_ISGID` set when created and a different gid will be used for any inode
|
||||
created under the directory. Previously, the "expected owner and mode"
|
||||
validation in `securejoin.MkdirAll` did not correctly handle this. We now
|
||||
correctly handle this case. (#24, #25)
|
||||
|
||||
## [0.3.1] - 2024-07-23 ##
|
||||
|
||||
### Changed ###
|
||||
- By allowing `Open(at)InRoot` to opt-out of the extra work done by `MkdirAll`
|
||||
to do the necessary "partial lookups", `Open(at)InRoot` now does less work
|
||||
for both implementations (resulting in a many-fold decrease in the number of
|
||||
operations for `openat2`, and a modest improvement for non-`openat2`) and is
|
||||
far more guaranteed to match the correct `openat2(RESOLVE_IN_ROOT)`
|
||||
behaviour.
|
||||
- We now use `readlinkat(fd, "")` where possible. For `Open(at)InRoot` this
|
||||
effectively just means that we no longer risk getting spurious errors during
|
||||
rename races. However, for our hardened procfs handler, this in theory should
|
||||
prevent mount attacks from tricking us when doing magic-link readlinks (even
|
||||
when using the unsafe host `/proc` handle). Unfortunately `Reopen` is still
|
||||
potentially vulnerable to those kinds of somewhat-esoteric attacks.
|
||||
|
||||
Technically this [will only work on post-2.6.39 kernels][linux-readlinkat-emptypath]
|
||||
but it seems incredibly unlikely anyone is using `filepath-securejoin` on a
|
||||
pre-2011 kernel.
|
||||
|
||||
### Fixed ###
|
||||
- Several improvements were made to the errors returned by `Open(at)InRoot` and
|
||||
`MkdirAll` when dealing with invalid paths under the emulated (ie.
|
||||
non-`openat2`) implementation. Previously, some paths would return the wrong
|
||||
error (`ENOENT` when the last component was a non-directory), and other paths
|
||||
would be returned as though they were acceptable (trailing-slash components
|
||||
after a non-directory would be ignored by `Open(at)InRoot`).
|
||||
|
||||
These changes were done to match `openat2`'s behaviour and purely is a
|
||||
consistency fix (most users are going to be using `openat2` anyway).
|
||||
|
||||
[linux-readlinkat-emptypath]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=65cfc6722361570bfe255698d9cd4dccaf47570d
|
||||
|
||||
## [0.3.0] - 2024-07-11 ##
|
||||
|
||||
### Added ###
|
||||
- A new set of `*os.File`-based APIs have been added. These are adapted from
|
||||
[libpathrs][] and we strongly suggest using them if possible (as they provide
|
||||
far more protection against attacks than `SecureJoin`):
|
||||
|
||||
- `Open(at)InRoot` resolves a path inside a rootfs and returns an `*os.File`
|
||||
handle to the path. Note that the handle returned is an `O_PATH` handle,
|
||||
which cannot be used for reading or writing (as well as some other
|
||||
operations -- [see open(2) for more details][open.2])
|
||||
|
||||
- `Reopen` takes an `O_PATH` file handle and safely re-opens it to upgrade
|
||||
it to a regular handle. This can also be used with non-`O_PATH` handles,
|
||||
but `O_PATH` is the most obvious application.
|
||||
|
||||
- `MkdirAll` is an implementation of `os.MkdirAll` that is safe to use to
|
||||
create a directory tree within a rootfs.
|
||||
|
||||
As these are new APIs, they may change in the future. However, they should be
|
||||
safe to start migrating to as we have extensive tests ensuring they behave
|
||||
correctly and are safe against various races and other attacks.
|
||||
|
||||
[libpathrs]: https://github.com/openSUSE/libpathrs
|
||||
[open.2]: https://www.man7.org/linux/man-pages/man2/open.2.html
|
||||
|
||||
## [0.2.5] - 2024-05-03 ##
|
||||
|
||||
### Changed ###
|
||||
- Some minor changes were made to how lexical components (like `..` and `.`)
|
||||
are handled during path generation in `SecureJoin`. There is no behaviour
|
||||
change as a result of this fix (the resulting paths are the same).
|
||||
|
||||
### Fixed ###
|
||||
- The error returned when we hit a symlink loop now references the correct
|
||||
path. (#10)
|
||||
|
||||
## [0.2.4] - 2023-09-06 ##
|
||||
|
||||
### Security ###
|
||||
- This release fixes a potential security issue in filepath-securejoin when
|
||||
used on Windows ([GHSA-6xv5-86q9-7xr8][], which could be used to generate
|
||||
paths outside of the provided rootfs in certain cases), as well as improving
|
||||
the overall behaviour of filepath-securejoin when dealing with Windows paths
|
||||
that contain volume names. Thanks to Paulo Gomes for discovering and fixing
|
||||
these issues.
|
||||
|
||||
### Fixed ###
|
||||
- Switch to GitHub Actions for CI so we can test on Windows as well as Linux
|
||||
and MacOS.
|
||||
|
||||
[GHSA-6xv5-86q9-7xr8]: https://github.com/advisories/GHSA-6xv5-86q9-7xr8
|
||||
|
||||
## [0.2.3] - 2021-06-04 ##
|
||||
|
||||
### Changed ###
|
||||
- Switch to Go 1.13-style `%w` error wrapping, letting us drop the dependency
|
||||
on `github.com/pkg/errors`.
|
||||
|
||||
## [0.2.2] - 2018-09-05 ##
|
||||
|
||||
### Changed ###
|
||||
- Use `syscall.ELOOP` as the base error for symlink loops, rather than our own
|
||||
(internal) error. This allows callers to more easily use `errors.Is` to check
|
||||
for this case.
|
||||
|
||||
## [0.2.1] - 2018-09-05 ##
|
||||
|
||||
### Fixed ###
|
||||
- Use our own `IsNotExist` implementation, which lets us handle `ENOTDIR`
|
||||
properly within `SecureJoin`.
|
||||
|
||||
## [0.2.0] - 2017-07-19 ##
|
||||
|
||||
We now have 100% test coverage!
|
||||
|
||||
### Added ###
|
||||
- Add a `SecureJoinVFS` API that can be used for mocking (as we do in our new
|
||||
tests) or for implementing custom handling of lookup operations (such as for
|
||||
rootless containers, where work is necessary to access directories with weird
|
||||
modes because we don't have `CAP_DAC_READ_SEARCH` or `CAP_DAC_OVERRIDE`).
|
||||
|
||||
## 0.1.0 - 2017-07-19
|
||||
|
||||
This is our first release of `github.com/cyphar/filepath-securejoin`,
|
||||
containing a full implementation with a coverage of 93.5% (the only missing
|
||||
cases are the error cases, which are hard to mocktest at the moment).
|
||||
|
||||
[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...HEAD
|
||||
[0.4.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.0...v0.4.1
|
||||
[0.4.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.6...v0.4.0
|
||||
[0.3.6]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.5...v0.3.6
|
||||
[0.3.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.4...v0.3.5
|
||||
[0.3.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.3...v0.3.4
|
||||
[0.3.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.2...v0.3.3
|
||||
[0.3.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...v0.3.2
|
||||
[0.3.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.0...v0.3.1
|
||||
[0.3.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.5...v0.3.0
|
||||
[0.2.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.4...v0.2.5
|
||||
[0.2.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.3...v0.2.4
|
||||
[0.2.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.2...v0.2.3
|
||||
[0.2.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.1...v0.2.2
|
||||
[0.2.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.0...v0.2.1
|
||||
[0.2.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.1.0...v0.2.0
|
28
vendor/github.com/cyphar/filepath-securejoin/LICENSE
generated
vendored
Normal file
28
vendor/github.com/cyphar/filepath-securejoin/LICENSE
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
|
||||
Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
169
vendor/github.com/cyphar/filepath-securejoin/README.md
generated
vendored
Normal file
169
vendor/github.com/cyphar/filepath-securejoin/README.md
generated
vendored
Normal file
@ -0,0 +1,169 @@
|
||||
## `filepath-securejoin` ##
|
||||
|
||||
[](https://pkg.go.dev/github.com/cyphar/filepath-securejoin)
|
||||
[](https://github.com/cyphar/filepath-securejoin/actions/workflows/ci.yml)
|
||||
|
||||
### Old API ###
|
||||
|
||||
This library was originally just an implementation of `SecureJoin` which was
|
||||
[intended to be included in the Go standard library][go#20126] as a safer
|
||||
`filepath.Join` that would restrict the path lookup to be inside a root
|
||||
directory.
|
||||
|
||||
The implementation was based on code that existed in several container
|
||||
runtimes. Unfortunately, this API is **fundamentally unsafe** against attackers
|
||||
that can modify path components after `SecureJoin` returns and before the
|
||||
caller uses the path, allowing for some fairly trivial TOCTOU attacks.
|
||||
|
||||
`SecureJoin` (and `SecureJoinVFS`) are still provided by this library to
|
||||
support legacy users, but new users are strongly suggested to avoid using
|
||||
`SecureJoin` and instead use the [new api](#new-api) or switch to
|
||||
[libpathrs][libpathrs].
|
||||
|
||||
With the above limitations in mind, this library guarantees the following:
|
||||
|
||||
* If no error is set, the resulting string **must** be a child path of
|
||||
`root` and will not contain any symlink path components (they will all be
|
||||
expanded).
|
||||
|
||||
* When expanding symlinks, all symlink path components **must** be resolved
|
||||
relative to the provided root. In particular, this can be considered a
|
||||
userspace implementation of how `chroot(2)` operates on file paths. Note that
|
||||
these symlinks will **not** be expanded lexically (`filepath.Clean` is not
|
||||
called on the input before processing).
|
||||
|
||||
* Non-existent path components are unaffected by `SecureJoin` (similar to
|
||||
`filepath.EvalSymlinks`'s semantics).
|
||||
|
||||
* The returned path will always be `filepath.Clean`ed and thus not contain any
|
||||
`..` components.
|
||||
|
||||
A (trivial) implementation of this function on GNU/Linux systems could be done
|
||||
with the following (note that this requires root privileges and is far more
|
||||
opaque than the implementation in this library, and also requires that
|
||||
`readlink` is inside the `root` path and is trustworthy):
|
||||
|
||||
```go
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
func SecureJoin(root, unsafePath string) (string, error) {
|
||||
unsafePath = string(filepath.Separator) + unsafePath
|
||||
cmd := exec.Command("chroot", root,
|
||||
"readlink", "--canonicalize-missing", "--no-newline", unsafePath)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
expanded := string(output)
|
||||
return filepath.Join(root, expanded), nil
|
||||
}
|
||||
```
|
||||
|
||||
[libpathrs]: https://github.com/openSUSE/libpathrs
|
||||
[go#20126]: https://github.com/golang/go/issues/20126
|
||||
|
||||
### New API ###
|
||||
|
||||
While we recommend users switch to [libpathrs][libpathrs] as soon as it has a
|
||||
stable release, some methods implemented by libpathrs have been ported to this
|
||||
library to ease the transition. These APIs are only supported on Linux.
|
||||
|
||||
These APIs are implemented such that `filepath-securejoin` will
|
||||
opportunistically use certain newer kernel APIs that make these operations far
|
||||
more secure. In particular:
|
||||
|
||||
* All of the lookup operations will use [`openat2`][openat2.2] on new enough
|
||||
kernels (Linux 5.6 or later) to restrict lookups through magic-links and
|
||||
bind-mounts (for certain operations) and to make use of `RESOLVE_IN_ROOT` to
|
||||
efficiently resolve symlinks within a rootfs.
|
||||
|
||||
* The APIs provide hardening against a malicious `/proc` mount to either detect
|
||||
or avoid being tricked by a `/proc` that is not legitimate. This is done
|
||||
using [`openat2`][openat2.2] for all users, and privileged users will also be
|
||||
further protected by using [`fsopen`][fsopen.2] and [`open_tree`][open_tree.2]
|
||||
(Linux 5.2 or later).
|
||||
|
||||
[openat2.2]: https://www.man7.org/linux/man-pages/man2/openat2.2.html
|
||||
[fsopen.2]: https://github.com/brauner/man-pages-md/blob/main/fsopen.md
|
||||
[open_tree.2]: https://github.com/brauner/man-pages-md/blob/main/open_tree.md
|
||||
|
||||
#### `OpenInRoot` ####
|
||||
|
||||
```go
|
||||
func OpenInRoot(root, unsafePath string) (*os.File, error)
|
||||
func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error)
|
||||
func Reopen(handle *os.File, flags int) (*os.File, error)
|
||||
```
|
||||
|
||||
`OpenInRoot` is a much safer version of
|
||||
|
||||
```go
|
||||
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||
file, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
|
||||
```
|
||||
|
||||
that protects against various race attacks that could lead to serious security
|
||||
issues, depending on the application. Note that the returned `*os.File` is an
|
||||
`O_PATH` file descriptor, which is quite restricted. Callers will probably need
|
||||
to use `Reopen` to get a more usable handle (this split is done to provide
|
||||
useful features like PTY spawning and to avoid users accidentally opening bad
|
||||
inodes that could cause a DoS).
|
||||
|
||||
Callers need to be careful in how they use the returned `*os.File`. Usually it
|
||||
is only safe to operate on the handle directly, and it is very easy to create a
|
||||
security issue. [libpathrs][libpathrs] provides far more helpers to make using
|
||||
these handles safer -- there is currently no plan to port them to
|
||||
`filepath-securejoin`.
|
||||
|
||||
`OpenatInRoot` is like `OpenInRoot` except that the root is provided using an
|
||||
`*os.File`. This allows you to ensure that multiple `OpenatInRoot` (or
|
||||
`MkdirAllHandle`) calls are operating on the same rootfs.
|
||||
|
||||
> **NOTE**: Unlike `SecureJoin`, `OpenInRoot` will error out as soon as it hits
|
||||
> a dangling symlink or non-existent path. This is in contrast to `SecureJoin`
|
||||
> which treated non-existent components as though they were real directories,
|
||||
> and would allow for partial resolution of dangling symlinks. These behaviours
|
||||
> are at odds with how Linux treats non-existent paths and dangling symlinks,
|
||||
> and so these are no longer allowed.
|
||||
|
||||
#### `MkdirAll` ####
|
||||
|
||||
```go
|
||||
func MkdirAll(root, unsafePath string, mode int) error
|
||||
func MkdirAllHandle(root *os.File, unsafePath string, mode int) (*os.File, error)
|
||||
```
|
||||
|
||||
`MkdirAll` is a much safer version of
|
||||
|
||||
```go
|
||||
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||
err = os.MkdirAll(path, mode)
|
||||
```
|
||||
|
||||
that protects against the same kinds of races that `OpenInRoot` protects
|
||||
against.
|
||||
|
||||
`MkdirAllHandle` is like `MkdirAll` except that the root is provided using an
|
||||
`*os.File` (the reason for this is the same as with `OpenatInRoot`) and an
|
||||
`*os.File` of the final created directory is returned (this directory is
|
||||
guaranteed to be effectively identical to the directory created by
|
||||
`MkdirAllHandle`, which is not possible to ensure by just using `OpenatInRoot`
|
||||
after `MkdirAll`).
|
||||
|
||||
> **NOTE**: Unlike `SecureJoin`, `MkdirAll` will error out as soon as it hits
|
||||
> a dangling symlink or non-existent path. This is in contrast to `SecureJoin`
|
||||
> which treated non-existent components as though they were real directories,
|
||||
> and would allow for partial resolution of dangling symlinks. These behaviours
|
||||
> are at odds with how Linux treats non-existent paths and dangling symlinks,
|
||||
> and so these are no longer allowed. This means that `MkdirAll` will not
|
||||
> create non-existent directories referenced by a dangling symlink.
|
||||
|
||||
### License ###
|
||||
|
||||
The license of this project is the same as Go, which is a BSD 3-clause license
|
||||
available in the `LICENSE` file.
|
1
vendor/github.com/cyphar/filepath-securejoin/VERSION
generated
vendored
Normal file
1
vendor/github.com/cyphar/filepath-securejoin/VERSION
generated
vendored
Normal file
@ -0,0 +1 @@
|
||||
0.4.1
|
39
vendor/github.com/cyphar/filepath-securejoin/doc.go
generated
vendored
Normal file
39
vendor/github.com/cyphar/filepath-securejoin/doc.go
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
|
||||
// Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package securejoin implements a set of helpers to make it easier to write Go
|
||||
// code that is safe against symlink-related escape attacks. The primary idea
|
||||
// is to let you resolve a path within a rootfs directory as if the rootfs was
|
||||
// a chroot.
|
||||
//
|
||||
// securejoin has two APIs, a "legacy" API and a "modern" API.
|
||||
//
|
||||
// The legacy API is [SecureJoin] and [SecureJoinVFS]. These methods are
|
||||
// **not** safe against race conditions where an attacker changes the
|
||||
// filesystem after (or during) the [SecureJoin] operation.
|
||||
//
|
||||
// The new API is made up of [OpenInRoot] and [MkdirAll] (and derived
|
||||
// functions). These are safe against racing attackers and have several other
|
||||
// protections that are not provided by the legacy API. There are many more
|
||||
// operations that most programs expect to be able to do safely, but we do not
|
||||
// provide explicit support for them because we want to encourage users to
|
||||
// switch to [libpathrs](https://github.com/openSUSE/libpathrs) which is a
|
||||
// cross-language next-generation library that is entirely designed around
|
||||
// operating on paths safely.
|
||||
//
|
||||
// securejoin has been used by several container runtimes (Docker, runc,
|
||||
// Kubernetes, etc) for quite a few years as a de-facto standard for operating
|
||||
// on container filesystem paths "safely". However, most users still use the
|
||||
// legacy API which is unsafe against various attacks (there is a fairly long
|
||||
// history of CVEs in dependent as a result). Users should switch to the modern
|
||||
// API as soon as possible (or even better, switch to libpathrs).
|
||||
//
|
||||
// This project was initially intended to be included in the Go standard
|
||||
// library, but [it was rejected](https://go.dev/issue/20126). There is now a
|
||||
// [new Go proposal](https://go.dev/issue/67002) for a safe path resolution API
|
||||
// that shares some of the goals of filepath-securejoin. However, that design
|
||||
// is intended to work like `openat2(RESOLVE_BENEATH)` which does not fit the
|
||||
// usecase of container runtimes and most system tools.
|
||||
package securejoin
|
18
vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_go120.go
generated
vendored
Normal file
18
vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_go120.go
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
//go:build linux && go1.20
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// wrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except
|
||||
// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap)
|
||||
// is only guaranteed to give you baseErr.
|
||||
func wrapBaseError(baseErr, extraErr error) error {
|
||||
return fmt.Errorf("%w: %w", extraErr, baseErr)
|
||||
}
|
38
vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_unsupported.go
generated
vendored
Normal file
38
vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
//go:build linux && !go1.20
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type wrappedError struct {
|
||||
inner error
|
||||
isError error
|
||||
}
|
||||
|
||||
func (err wrappedError) Is(target error) bool {
|
||||
return err.isError == target
|
||||
}
|
||||
|
||||
func (err wrappedError) Unwrap() error {
|
||||
return err.inner
|
||||
}
|
||||
|
||||
func (err wrappedError) Error() string {
|
||||
return fmt.Sprintf("%v: %v", err.isError, err.inner)
|
||||
}
|
||||
|
||||
// wrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except
|
||||
// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap)
|
||||
// is only guaranteed to give you baseErr.
|
||||
func wrapBaseError(baseErr, extraErr error) error {
|
||||
return wrappedError{
|
||||
inner: baseErr,
|
||||
isError: extraErr,
|
||||
}
|
||||
}
|
32
vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_go121.go
generated
vendored
Normal file
32
vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_go121.go
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
//go:build linux && go1.21
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"sync"
|
||||
)
|
||||
|
||||
func slices_DeleteFunc[S ~[]E, E any](slice S, delFn func(E) bool) S {
|
||||
return slices.DeleteFunc(slice, delFn)
|
||||
}
|
||||
|
||||
func slices_Contains[S ~[]E, E comparable](slice S, val E) bool {
|
||||
return slices.Contains(slice, val)
|
||||
}
|
||||
|
||||
func slices_Clone[S ~[]E, E any](slice S) S {
|
||||
return slices.Clone(slice)
|
||||
}
|
||||
|
||||
func sync_OnceValue[T any](f func() T) func() T {
|
||||
return sync.OnceValue(f)
|
||||
}
|
||||
|
||||
func sync_OnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) {
|
||||
return sync.OnceValues(f)
|
||||
}
|
124
vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_unsupported.go
generated
vendored
Normal file
124
vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,124 @@
|
||||
//go:build linux && !go1.21
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// These are very minimal implementations of functions that appear in Go 1.21's
|
||||
// stdlib, included so that we can build on older Go versions. Most are
|
||||
// borrowed directly from the stdlib, and a few are modified to be "obviously
|
||||
// correct" without needing to copy too many other helpers.
|
||||
|
||||
// clearSlice is equivalent to the builtin clear from Go 1.21.
|
||||
// Copied from the Go 1.24 stdlib implementation.
|
||||
func clearSlice[S ~[]E, E any](slice S) {
|
||||
var zero E
|
||||
for i := range slice {
|
||||
slice[i] = zero
|
||||
}
|
||||
}
|
||||
|
||||
// Copied from the Go 1.24 stdlib implementation.
|
||||
func slices_IndexFunc[S ~[]E, E any](s S, f func(E) bool) int {
|
||||
for i := range s {
|
||||
if f(s[i]) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// Copied from the Go 1.24 stdlib implementation.
|
||||
func slices_DeleteFunc[S ~[]E, E any](s S, del func(E) bool) S {
|
||||
i := slices_IndexFunc(s, del)
|
||||
if i == -1 {
|
||||
return s
|
||||
}
|
||||
// Don't start copying elements until we find one to delete.
|
||||
for j := i + 1; j < len(s); j++ {
|
||||
if v := s[j]; !del(v) {
|
||||
s[i] = v
|
||||
i++
|
||||
}
|
||||
}
|
||||
clearSlice(s[i:]) // zero/nil out the obsolete elements, for GC
|
||||
return s[:i]
|
||||
}
|
||||
|
||||
// Similar to the stdlib slices.Contains, except that we don't have
|
||||
// slices.Index so we need to use slices.IndexFunc for this non-Func helper.
|
||||
func slices_Contains[S ~[]E, E comparable](s S, v E) bool {
|
||||
return slices_IndexFunc(s, func(e E) bool { return e == v }) >= 0
|
||||
}
|
||||
|
||||
// Copied from the Go 1.24 stdlib implementation.
|
||||
func slices_Clone[S ~[]E, E any](s S) S {
|
||||
// Preserve nil in case it matters.
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return append(S([]E{}), s...)
|
||||
}
|
||||
|
||||
// Copied from the Go 1.24 stdlib implementation.
|
||||
func sync_OnceValue[T any](f func() T) func() T {
|
||||
var (
|
||||
once sync.Once
|
||||
valid bool
|
||||
p any
|
||||
result T
|
||||
)
|
||||
g := func() {
|
||||
defer func() {
|
||||
p = recover()
|
||||
if !valid {
|
||||
panic(p)
|
||||
}
|
||||
}()
|
||||
result = f()
|
||||
f = nil
|
||||
valid = true
|
||||
}
|
||||
return func() T {
|
||||
once.Do(g)
|
||||
if !valid {
|
||||
panic(p)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// Copied from the Go 1.24 stdlib implementation.
|
||||
func sync_OnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) {
|
||||
var (
|
||||
once sync.Once
|
||||
valid bool
|
||||
p any
|
||||
r1 T1
|
||||
r2 T2
|
||||
)
|
||||
g := func() {
|
||||
defer func() {
|
||||
p = recover()
|
||||
if !valid {
|
||||
panic(p)
|
||||
}
|
||||
}()
|
||||
r1, r2 = f()
|
||||
f = nil
|
||||
valid = true
|
||||
}
|
||||
return func() (T1, T2) {
|
||||
once.Do(g)
|
||||
if !valid {
|
||||
panic(p)
|
||||
}
|
||||
return r1, r2
|
||||
}
|
||||
}
|
166
vendor/github.com/cyphar/filepath-securejoin/join.go
generated
vendored
Normal file
166
vendor/github.com/cyphar/filepath-securejoin/join.go
generated
vendored
Normal file
@ -0,0 +1,166 @@
|
||||
// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
|
||||
// Copyright (C) 2017-2025 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const maxSymlinkLimit = 255
|
||||
|
||||
// IsNotExist tells you if err is an error that implies that either the path
|
||||
// accessed does not exist (or path components don't exist). This is
|
||||
// effectively a more broad version of [os.IsNotExist].
|
||||
func IsNotExist(err error) bool {
|
||||
// Check that it's not actually an ENOTDIR, which in some cases is a more
|
||||
// convoluted case of ENOENT (usually involving weird paths).
|
||||
return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT)
|
||||
}
|
||||
|
||||
// errUnsafeRoot is returned if the user provides SecureJoinVFS with a path
|
||||
// that contains ".." components.
|
||||
var errUnsafeRoot = errors.New("root path provided to SecureJoin contains '..' components")
|
||||
|
||||
// stripVolume just gets rid of the Windows volume included in a path. Based on
|
||||
// some godbolt tests, the Go compiler is smart enough to make this a no-op on
|
||||
// Linux.
|
||||
func stripVolume(path string) string {
|
||||
return path[len(filepath.VolumeName(path)):]
|
||||
}
|
||||
|
||||
// hasDotDot checks if the path contains ".." components in a platform-agnostic
|
||||
// way.
|
||||
func hasDotDot(path string) bool {
|
||||
// If we are on Windows, strip any volume letters. It turns out that
|
||||
// C:..\foo may (or may not) be a valid pathname and we need to handle that
|
||||
// leading "..".
|
||||
path = stripVolume(path)
|
||||
// Look for "/../" in the path, but we need to handle leading and trailing
|
||||
// ".."s by adding separators. Doing this with filepath.Separator is ugly
|
||||
// so just convert to Unix-style "/" first.
|
||||
path = filepath.ToSlash(path)
|
||||
return strings.Contains("/"+path+"/", "/../")
|
||||
}
|
||||
|
||||
// SecureJoinVFS joins the two given path components (similar to [filepath.Join]) except
|
||||
// that the returned path is guaranteed to be scoped inside the provided root
|
||||
// path (when evaluated). Any symbolic links in the path are evaluated with the
|
||||
// given root treated as the root of the filesystem, similar to a chroot. The
|
||||
// filesystem state is evaluated through the given [VFS] interface (if nil, the
|
||||
// standard [os].* family of functions are used).
|
||||
//
|
||||
// Note that the guarantees provided by this function only apply if the path
|
||||
// components in the returned string are not modified (in other words are not
|
||||
// replaced with symlinks on the filesystem) after this function has returned.
|
||||
// Such a symlink race is necessarily out-of-scope of SecureJoinVFS.
|
||||
//
|
||||
// NOTE: Due to the above limitation, Linux users are strongly encouraged to
|
||||
// use [OpenInRoot] instead, which does safely protect against these kinds of
|
||||
// attacks. There is no way to solve this problem with SecureJoinVFS because
|
||||
// the API is fundamentally wrong (you cannot return a "safe" path string and
|
||||
// guarantee it won't be modified afterwards).
|
||||
//
|
||||
// Volume names in unsafePath are always discarded, regardless if they are
|
||||
// provided via direct input or when evaluating symlinks. Therefore:
|
||||
//
|
||||
// "C:\Temp" + "D:\path\to\file.txt" results in "C:\Temp\path\to\file.txt"
|
||||
//
|
||||
// If the provided root is not [filepath.Clean] then an error will be returned,
|
||||
// as such root paths are bordering on somewhat unsafe and using such paths is
|
||||
// not best practice. We also strongly suggest that any root path is first
|
||||
// fully resolved using [filepath.EvalSymlinks] or otherwise constructed to
|
||||
// avoid containing symlink components. Of course, the root also *must not* be
|
||||
// attacker-controlled.
|
||||
func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) {
|
||||
// The root path must not contain ".." components, otherwise when we join
|
||||
// the subpath we will end up with a weird path. We could work around this
|
||||
// in other ways but users shouldn't be giving us non-lexical root paths in
|
||||
// the first place.
|
||||
if hasDotDot(root) {
|
||||
return "", errUnsafeRoot
|
||||
}
|
||||
|
||||
// Use the os.* VFS implementation if none was specified.
|
||||
if vfs == nil {
|
||||
vfs = osVFS{}
|
||||
}
|
||||
|
||||
unsafePath = filepath.FromSlash(unsafePath)
|
||||
var (
|
||||
currentPath string
|
||||
remainingPath = unsafePath
|
||||
linksWalked int
|
||||
)
|
||||
for remainingPath != "" {
|
||||
// On Windows, if we managed to end up at a path referencing a volume,
|
||||
// drop the volume to make sure we don't end up with broken paths or
|
||||
// escaping the root volume.
|
||||
remainingPath = stripVolume(remainingPath)
|
||||
|
||||
// Get the next path component.
|
||||
var part string
|
||||
if i := strings.IndexRune(remainingPath, filepath.Separator); i == -1 {
|
||||
part, remainingPath = remainingPath, ""
|
||||
} else {
|
||||
part, remainingPath = remainingPath[:i], remainingPath[i+1:]
|
||||
}
|
||||
|
||||
// Apply the component lexically to the path we are building.
|
||||
// currentPath does not contain any symlinks, and we are lexically
|
||||
// dealing with a single component, so it's okay to do a filepath.Clean
|
||||
// here.
|
||||
nextPath := filepath.Join(string(filepath.Separator), currentPath, part)
|
||||
if nextPath == string(filepath.Separator) {
|
||||
currentPath = ""
|
||||
continue
|
||||
}
|
||||
fullPath := root + string(filepath.Separator) + nextPath
|
||||
|
||||
// Figure out whether the path is a symlink.
|
||||
fi, err := vfs.Lstat(fullPath)
|
||||
if err != nil && !IsNotExist(err) {
|
||||
return "", err
|
||||
}
|
||||
// Treat non-existent path components the same as non-symlinks (we
|
||||
// can't do any better here).
|
||||
if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 {
|
||||
currentPath = nextPath
|
||||
continue
|
||||
}
|
||||
|
||||
// It's a symlink, so get its contents and expand it by prepending it
|
||||
// to the yet-unparsed path.
|
||||
linksWalked++
|
||||
if linksWalked > maxSymlinkLimit {
|
||||
return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP}
|
||||
}
|
||||
|
||||
dest, err := vfs.Readlink(fullPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
remainingPath = dest + string(filepath.Separator) + remainingPath
|
||||
// Absolute symlinks reset any work we've already done.
|
||||
if filepath.IsAbs(dest) {
|
||||
currentPath = ""
|
||||
}
|
||||
}
|
||||
|
||||
// There should be no lexical components like ".." left in the path here,
|
||||
// but for safety clean up the path before joining it to the root.
|
||||
finalPath := filepath.Join(string(filepath.Separator), currentPath)
|
||||
return filepath.Join(root, finalPath), nil
|
||||
}
|
||||
|
||||
// SecureJoin is a wrapper around [SecureJoinVFS] that just uses the [os].* library
|
||||
// of functions as the [VFS]. If in doubt, use this function over [SecureJoinVFS].
|
||||
func SecureJoin(root, unsafePath string) (string, error) {
|
||||
return SecureJoinVFS(root, unsafePath, nil)
|
||||
}
|
388
vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go
generated
vendored
Normal file
388
vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go
generated
vendored
Normal file
@ -0,0 +1,388 @@
|
||||
//go:build linux
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type symlinkStackEntry struct {
|
||||
// (dir, remainingPath) is what we would've returned if the link didn't
|
||||
// exist. This matches what openat2(RESOLVE_IN_ROOT) would return in
|
||||
// this case.
|
||||
dir *os.File
|
||||
remainingPath string
|
||||
// linkUnwalked is the remaining path components from the original
|
||||
// Readlink which we have yet to walk. When this slice is empty, we
|
||||
// drop the link from the stack.
|
||||
linkUnwalked []string
|
||||
}
|
||||
|
||||
func (se symlinkStackEntry) String() string {
|
||||
return fmt.Sprintf("<%s>/%s [->%s]", se.dir.Name(), se.remainingPath, strings.Join(se.linkUnwalked, "/"))
|
||||
}
|
||||
|
||||
func (se symlinkStackEntry) Close() {
|
||||
_ = se.dir.Close()
|
||||
}
|
||||
|
||||
type symlinkStack []*symlinkStackEntry
|
||||
|
||||
func (s *symlinkStack) IsEmpty() bool {
|
||||
return s == nil || len(*s) == 0
|
||||
}
|
||||
|
||||
func (s *symlinkStack) Close() {
|
||||
if s != nil {
|
||||
for _, link := range *s {
|
||||
link.Close()
|
||||
}
|
||||
// TODO: Switch to clear once we switch to Go 1.21.
|
||||
*s = nil
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
errEmptyStack = errors.New("[internal] stack is empty")
|
||||
errBrokenSymlinkStack = errors.New("[internal error] broken symlink stack")
|
||||
)
|
||||
|
||||
func (s *symlinkStack) popPart(part string) error {
|
||||
if s == nil || s.IsEmpty() {
|
||||
// If there is nothing in the symlink stack, then the part was from the
|
||||
// real path provided by the user, and this is a no-op.
|
||||
return errEmptyStack
|
||||
}
|
||||
if part == "." {
|
||||
// "." components are no-ops -- we drop them when doing SwapLink.
|
||||
return nil
|
||||
}
|
||||
|
||||
tailEntry := (*s)[len(*s)-1]
|
||||
|
||||
// Double-check that we are popping the component we expect.
|
||||
if len(tailEntry.linkUnwalked) == 0 {
|
||||
return fmt.Errorf("%w: trying to pop component %q of empty stack entry %s", errBrokenSymlinkStack, part, tailEntry)
|
||||
}
|
||||
headPart := tailEntry.linkUnwalked[0]
|
||||
if headPart != part {
|
||||
return fmt.Errorf("%w: trying to pop component %q but the last stack entry is %s (%q)", errBrokenSymlinkStack, part, tailEntry, headPart)
|
||||
}
|
||||
|
||||
// Drop the component, but keep the entry around in case we are dealing
|
||||
// with a "tail-chained" symlink.
|
||||
tailEntry.linkUnwalked = tailEntry.linkUnwalked[1:]
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *symlinkStack) PopPart(part string) error {
|
||||
if err := s.popPart(part); err != nil {
|
||||
if errors.Is(err, errEmptyStack) {
|
||||
// Skip empty stacks.
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Clean up any of the trailing stack entries that are empty.
|
||||
for lastGood := len(*s) - 1; lastGood >= 0; lastGood-- {
|
||||
entry := (*s)[lastGood]
|
||||
if len(entry.linkUnwalked) > 0 {
|
||||
break
|
||||
}
|
||||
entry.Close()
|
||||
(*s) = (*s)[:lastGood]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *symlinkStack) push(dir *os.File, remainingPath, linkTarget string) error {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
// Split the link target and clean up any "" parts.
|
||||
linkTargetParts := slices_DeleteFunc(
|
||||
strings.Split(linkTarget, "/"),
|
||||
func(part string) bool { return part == "" || part == "." })
|
||||
|
||||
// Copy the directory so the caller doesn't close our copy.
|
||||
dirCopy, err := dupFile(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Add to the stack.
|
||||
*s = append(*s, &symlinkStackEntry{
|
||||
dir: dirCopy,
|
||||
remainingPath: remainingPath,
|
||||
linkUnwalked: linkTargetParts,
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *symlinkStack) SwapLink(linkPart string, dir *os.File, remainingPath, linkTarget string) error {
|
||||
// If we are currently inside a symlink resolution, remove the symlink
|
||||
// component from the last symlink entry, but don't remove the entry even
|
||||
// if it's empty. If we are a "tail-chained" symlink (a trailing symlink we
|
||||
// hit during a symlink resolution) we need to keep the old symlink until
|
||||
// we finish the resolution.
|
||||
if err := s.popPart(linkPart); err != nil {
|
||||
if !errors.Is(err, errEmptyStack) {
|
||||
return err
|
||||
}
|
||||
// Push the component regardless of whether the stack was empty.
|
||||
}
|
||||
return s.push(dir, remainingPath, linkTarget)
|
||||
}
|
||||
|
||||
func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) {
|
||||
if s == nil || s.IsEmpty() {
|
||||
return nil, "", false
|
||||
}
|
||||
tailEntry := (*s)[0]
|
||||
*s = (*s)[1:]
|
||||
return tailEntry.dir, tailEntry.remainingPath, true
|
||||
}
|
||||
|
||||
// partialLookupInRoot tries to lookup as much of the request path as possible
|
||||
// within the provided root (a-la RESOLVE_IN_ROOT) and opens the final existing
|
||||
// component of the requested path, returning a file handle to the final
|
||||
// existing component and a string containing the remaining path components.
|
||||
func partialLookupInRoot(root *os.File, unsafePath string) (*os.File, string, error) {
|
||||
return lookupInRoot(root, unsafePath, true)
|
||||
}
|
||||
|
||||
func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) {
|
||||
handle, remainingPath, err := lookupInRoot(root, unsafePath, false)
|
||||
if remainingPath != "" && err == nil {
|
||||
// should never happen
|
||||
err = fmt.Errorf("[bug] non-empty remaining path when doing a non-partial lookup: %q", remainingPath)
|
||||
}
|
||||
// lookupInRoot(partial=false) will always close the handle if an error is
|
||||
// returned, so no need to double-check here.
|
||||
return handle, err
|
||||
}
|
||||
|
||||
func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) {
|
||||
unsafePath = filepath.ToSlash(unsafePath) // noop
|
||||
|
||||
// This is very similar to SecureJoin, except that we operate on the
|
||||
// components using file descriptors. We then return the last component we
|
||||
// managed open, along with the remaining path components not opened.
|
||||
|
||||
// Try to use openat2 if possible.
|
||||
if hasOpenat2() {
|
||||
return lookupOpenat2(root, unsafePath, partial)
|
||||
}
|
||||
|
||||
// Get the "actual" root path from /proc/self/fd. This is necessary if the
|
||||
// root is some magic-link like /proc/$pid/root, in which case we want to
|
||||
// make sure when we do checkProcSelfFdPath that we are using the correct
|
||||
// root path.
|
||||
logicalRootPath, err := procSelfFdReadlink(root)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("get real root path: %w", err)
|
||||
}
|
||||
|
||||
currentDir, err := dupFile(root)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("clone root fd: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
// If a handle is not returned, close the internal handle.
|
||||
if Handle == nil {
|
||||
_ = currentDir.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
// symlinkStack is used to emulate how openat2(RESOLVE_IN_ROOT) treats
|
||||
// dangling symlinks. If we hit a non-existent path while resolving a
|
||||
// symlink, we need to return the (dir, remainingPath) that we had when we
|
||||
// hit the symlink (treating the symlink as though it were a regular file).
|
||||
// The set of (dir, remainingPath) sets is stored within the symlinkStack
|
||||
// and we add and remove parts when we hit symlink and non-symlink
|
||||
// components respectively. We need a stack because of recursive symlinks
|
||||
// (symlinks that contain symlink components in their target).
|
||||
//
|
||||
// Note that the stack is ONLY used for book-keeping. All of the actual
|
||||
// path walking logic is still based on currentPath/remainingPath and
|
||||
// currentDir (as in SecureJoin).
|
||||
var symStack *symlinkStack
|
||||
if partial {
|
||||
symStack = new(symlinkStack)
|
||||
defer symStack.Close()
|
||||
}
|
||||
|
||||
var (
|
||||
linksWalked int
|
||||
currentPath string
|
||||
remainingPath = unsafePath
|
||||
)
|
||||
for remainingPath != "" {
|
||||
// Save the current remaining path so if the part is not real we can
|
||||
// return the path including the component.
|
||||
oldRemainingPath := remainingPath
|
||||
|
||||
// Get the next path component.
|
||||
var part string
|
||||
if i := strings.IndexByte(remainingPath, '/'); i == -1 {
|
||||
part, remainingPath = remainingPath, ""
|
||||
} else {
|
||||
part, remainingPath = remainingPath[:i], remainingPath[i+1:]
|
||||
}
|
||||
// If we hit an empty component, we need to treat it as though it is
|
||||
// "." so that trailing "/" and "//" components on a non-directory
|
||||
// correctly return the right error code.
|
||||
if part == "" {
|
||||
part = "."
|
||||
}
|
||||
|
||||
// Apply the component lexically to the path we are building.
|
||||
// currentPath does not contain any symlinks, and we are lexically
|
||||
// dealing with a single component, so it's okay to do a filepath.Clean
|
||||
// here.
|
||||
nextPath := path.Join("/", currentPath, part)
|
||||
// If we logically hit the root, just clone the root rather than
|
||||
// opening the part and doing all of the other checks.
|
||||
if nextPath == "/" {
|
||||
if err := symStack.PopPart(part); err != nil {
|
||||
return nil, "", fmt.Errorf("walking into root with part %q failed: %w", part, err)
|
||||
}
|
||||
// Jump to root.
|
||||
rootClone, err := dupFile(root)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("clone root fd: %w", err)
|
||||
}
|
||||
_ = currentDir.Close()
|
||||
currentDir = rootClone
|
||||
currentPath = nextPath
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to open the next component.
|
||||
nextDir, err := openatFile(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
||||
switch {
|
||||
case err == nil:
|
||||
st, err := nextDir.Stat()
|
||||
if err != nil {
|
||||
_ = nextDir.Close()
|
||||
return nil, "", fmt.Errorf("stat component %q: %w", part, err)
|
||||
}
|
||||
|
||||
switch st.Mode() & os.ModeType {
|
||||
case os.ModeSymlink:
|
||||
// readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See
|
||||
// Linux commit 65cfc6722361 ("readlinkat(), fchownat() and
|
||||
// fstatat() with empty relative pathnames").
|
||||
linkDest, err := readlinkatFile(nextDir, "")
|
||||
// We don't need the handle anymore.
|
||||
_ = nextDir.Close()
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
linksWalked++
|
||||
if linksWalked > maxSymlinkLimit {
|
||||
return nil, "", &os.PathError{Op: "securejoin.lookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP}
|
||||
}
|
||||
|
||||
// Swap out the symlink's component for the link entry itself.
|
||||
if err := symStack.SwapLink(part, currentDir, oldRemainingPath, linkDest); err != nil {
|
||||
return nil, "", fmt.Errorf("walking into symlink %q failed: push symlink: %w", part, err)
|
||||
}
|
||||
|
||||
// Update our logical remaining path.
|
||||
remainingPath = linkDest + "/" + remainingPath
|
||||
// Absolute symlinks reset any work we've already done.
|
||||
if path.IsAbs(linkDest) {
|
||||
// Jump to root.
|
||||
rootClone, err := dupFile(root)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("clone root fd: %w", err)
|
||||
}
|
||||
_ = currentDir.Close()
|
||||
currentDir = rootClone
|
||||
currentPath = "/"
|
||||
}
|
||||
|
||||
default:
|
||||
// If we are dealing with a directory, simply walk into it.
|
||||
_ = currentDir.Close()
|
||||
currentDir = nextDir
|
||||
currentPath = nextPath
|
||||
|
||||
// The part was real, so drop it from the symlink stack.
|
||||
if err := symStack.PopPart(part); err != nil {
|
||||
return nil, "", fmt.Errorf("walking into directory %q failed: %w", part, err)
|
||||
}
|
||||
|
||||
// If we are operating on a .., make sure we haven't escaped.
|
||||
// We only have to check for ".." here because walking down
|
||||
// into a regular component component cannot cause you to
|
||||
// escape. This mirrors the logic in RESOLVE_IN_ROOT, except we
|
||||
// have to check every ".." rather than only checking after a
|
||||
// rename or mount on the system.
|
||||
if part == ".." {
|
||||
// Make sure the root hasn't moved.
|
||||
if err := checkProcSelfFdPath(logicalRootPath, root); err != nil {
|
||||
return nil, "", fmt.Errorf("root path moved during lookup: %w", err)
|
||||
}
|
||||
// Make sure the path is what we expect.
|
||||
fullPath := logicalRootPath + nextPath
|
||||
if err := checkProcSelfFdPath(fullPath, currentDir); err != nil {
|
||||
return nil, "", fmt.Errorf("walking into %q had unexpected result: %w", part, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
if !partial {
|
||||
return nil, "", err
|
||||
}
|
||||
// If there are any remaining components in the symlink stack, we
|
||||
// are still within a symlink resolution and thus we hit a dangling
|
||||
// symlink. So pretend that the first symlink in the stack we hit
|
||||
// was an ENOENT (to match openat2).
|
||||
if oldDir, remainingPath, ok := symStack.PopTopSymlink(); ok {
|
||||
_ = currentDir.Close()
|
||||
return oldDir, remainingPath, err
|
||||
}
|
||||
// We have hit a final component that doesn't exist, so we have our
|
||||
// partial open result. Note that we have to use the OLD remaining
|
||||
// path, since the lookup failed.
|
||||
return currentDir, oldRemainingPath, err
|
||||
}
|
||||
}
|
||||
|
||||
// If the unsafePath had a trailing slash, we need to make sure we try to
|
||||
// do a relative "." open so that we will correctly return an error when
|
||||
// the final component is a non-directory (to match openat2). In the
|
||||
// context of openat2, a trailing slash and a trailing "/." are completely
|
||||
// equivalent.
|
||||
if strings.HasSuffix(unsafePath, "/") {
|
||||
nextDir, err := openatFile(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
if !partial {
|
||||
_ = currentDir.Close()
|
||||
currentDir = nil
|
||||
}
|
||||
return currentDir, "", err
|
||||
}
|
||||
_ = currentDir.Close()
|
||||
currentDir = nextDir
|
||||
}
|
||||
|
||||
// All of the components existed!
|
||||
return currentDir, "", nil
|
||||
}
|
236
vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
generated
vendored
Normal file
236
vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
generated
vendored
Normal file
@ -0,0 +1,236 @@
|
||||
//go:build linux
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
errInvalidMode = errors.New("invalid permission mode")
|
||||
errPossibleAttack = errors.New("possible attack detected")
|
||||
)
|
||||
|
||||
// modePermExt is like os.ModePerm except that it also includes the set[ug]id
|
||||
// and sticky bits.
|
||||
const modePermExt = os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky
|
||||
|
||||
//nolint:cyclop // this function needs to handle a lot of cases
|
||||
func toUnixMode(mode os.FileMode) (uint32, error) {
|
||||
sysMode := uint32(mode.Perm())
|
||||
if mode&os.ModeSetuid != 0 {
|
||||
sysMode |= unix.S_ISUID
|
||||
}
|
||||
if mode&os.ModeSetgid != 0 {
|
||||
sysMode |= unix.S_ISGID
|
||||
}
|
||||
if mode&os.ModeSticky != 0 {
|
||||
sysMode |= unix.S_ISVTX
|
||||
}
|
||||
// We don't allow file type bits.
|
||||
if mode&os.ModeType != 0 {
|
||||
return 0, fmt.Errorf("%w %+.3o (%s): type bits not permitted", errInvalidMode, mode, mode)
|
||||
}
|
||||
// We don't allow other unknown modes.
|
||||
if mode&^modePermExt != 0 || sysMode&unix.S_IFMT != 0 {
|
||||
return 0, fmt.Errorf("%w %+.3o (%s): unknown mode bits", errInvalidMode, mode, mode)
|
||||
}
|
||||
return sysMode, nil
|
||||
}
|
||||
|
||||
// MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use
|
||||
// in two respects:
|
||||
//
|
||||
// - The caller provides the root directory as an *[os.File] (preferably O_PATH)
|
||||
// handle. This means that the caller can be sure which root directory is
|
||||
// being used. Note that this can be emulated by using /proc/self/fd/... as
|
||||
// the root path with [os.MkdirAll].
|
||||
//
|
||||
// - Once all of the directories have been created, an *[os.File] O_PATH handle
|
||||
// to the directory at unsafePath is returned to the caller. This is done in
|
||||
// an effectively-race-free way (an attacker would only be able to swap the
|
||||
// final directory component), which is not possible to emulate with
|
||||
// [MkdirAll].
|
||||
//
|
||||
// In addition, the returned handle is obtained far more efficiently than doing
|
||||
// a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after
|
||||
// doing [MkdirAll]. If you intend to open the directory after creating it, you
|
||||
// should use MkdirAllHandle.
|
||||
func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.File, Err error) {
|
||||
unixMode, err := toUnixMode(mode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// On Linux, mkdirat(2) (and os.Mkdir) silently ignore the suid and sgid
|
||||
// bits. We could also silently ignore them but since we have very few
|
||||
// users it seems more prudent to return an error so users notice that
|
||||
// these bits will not be set.
|
||||
if unixMode&^0o1777 != 0 {
|
||||
return nil, fmt.Errorf("%w for mkdir %+.3o: suid and sgid are ignored by mkdir", errInvalidMode, mode)
|
||||
}
|
||||
|
||||
// Try to open as much of the path as possible.
|
||||
currentDir, remainingPath, err := partialLookupInRoot(root, unsafePath)
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
_ = currentDir.Close()
|
||||
}
|
||||
}()
|
||||
if err != nil && !errors.Is(err, unix.ENOENT) {
|
||||
return nil, fmt.Errorf("find existing subpath of %q: %w", unsafePath, err)
|
||||
}
|
||||
|
||||
// If there is an attacker deleting directories as we walk into them,
|
||||
// detect this proactively. Note this is guaranteed to detect if the
|
||||
// attacker deleted any part of the tree up to currentDir.
|
||||
//
|
||||
// Once we walk into a dead directory, partialLookupInRoot would not be
|
||||
// able to walk further down the tree (directories must be empty before
|
||||
// they are deleted), and if the attacker has removed the entire tree we
|
||||
// can be sure that anything that was originally inside a dead directory
|
||||
// must also be deleted and thus is a dead directory in its own right.
|
||||
//
|
||||
// This is mostly a quality-of-life check, because mkdir will simply fail
|
||||
// later if the attacker deletes the tree after this check.
|
||||
if err := isDeadInode(currentDir); err != nil {
|
||||
return nil, fmt.Errorf("finding existing subpath of %q: %w", unsafePath, err)
|
||||
}
|
||||
|
||||
// Re-open the path to match the O_DIRECTORY reopen loop later (so that we
|
||||
// always return a non-O_PATH handle). We also check that we actually got a
|
||||
// directory.
|
||||
if reopenDir, err := Reopen(currentDir, unix.O_DIRECTORY|unix.O_CLOEXEC); errors.Is(err, unix.ENOTDIR) {
|
||||
return nil, fmt.Errorf("cannot create subdirectories in %q: %w", currentDir.Name(), unix.ENOTDIR)
|
||||
} else if err != nil {
|
||||
return nil, fmt.Errorf("re-opening handle to %q: %w", currentDir.Name(), err)
|
||||
} else {
|
||||
_ = currentDir.Close()
|
||||
currentDir = reopenDir
|
||||
}
|
||||
|
||||
remainingParts := strings.Split(remainingPath, string(filepath.Separator))
|
||||
if slices_Contains(remainingParts, "..") {
|
||||
// The path contained ".." components after the end of the "real"
|
||||
// components. We could try to safely resolve ".." here but that would
|
||||
// add a bunch of extra logic for something that it's not clear even
|
||||
// needs to be supported. So just return an error.
|
||||
//
|
||||
// If we do filepath.Clean(remainingPath) then we end up with the
|
||||
// problem that ".." can erase a trailing dangling symlink and produce
|
||||
// a path that doesn't quite match what the user asked for.
|
||||
return nil, fmt.Errorf("%w: yet-to-be-created path %q contains '..' components", unix.ENOENT, remainingPath)
|
||||
}
|
||||
|
||||
// Create the remaining components.
|
||||
for _, part := range remainingParts {
|
||||
switch part {
|
||||
case "", ".":
|
||||
// Skip over no-op paths.
|
||||
continue
|
||||
}
|
||||
|
||||
// NOTE: mkdir(2) will not follow trailing symlinks, so we can safely
|
||||
// create the final component without worrying about symlink-exchange
|
||||
// attacks.
|
||||
//
|
||||
// If we get -EEXIST, it's possible that another program created the
|
||||
// directory at the same time as us. In that case, just continue on as
|
||||
// if we created it (if the created inode is not a directory, the
|
||||
// following open call will fail).
|
||||
if err := unix.Mkdirat(int(currentDir.Fd()), part, unixMode); err != nil && !errors.Is(err, unix.EEXIST) {
|
||||
err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err}
|
||||
// Make the error a bit nicer if the directory is dead.
|
||||
if deadErr := isDeadInode(currentDir); deadErr != nil {
|
||||
// TODO: Once we bump the minimum Go version to 1.20, we can use
|
||||
// multiple %w verbs for this wrapping. For now we need to use a
|
||||
// compatibility shim for older Go versions.
|
||||
//err = fmt.Errorf("%w (%w)", err, deadErr)
|
||||
err = wrapBaseError(err, deadErr)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get a handle to the next component. O_DIRECTORY means we don't need
|
||||
// to use O_PATH.
|
||||
var nextDir *os.File
|
||||
if hasOpenat2() {
|
||||
nextDir, err = openat2File(currentDir, part, &unix.OpenHow{
|
||||
Flags: unix.O_NOFOLLOW | unix.O_DIRECTORY | unix.O_CLOEXEC,
|
||||
Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_NO_XDEV,
|
||||
})
|
||||
} else {
|
||||
nextDir, err = openatFile(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_ = currentDir.Close()
|
||||
currentDir = nextDir
|
||||
|
||||
// It's possible that the directory we just opened was swapped by an
|
||||
// attacker. Unfortunately there isn't much we can do to protect
|
||||
// against this, and MkdirAll's behaviour is that we will reuse
|
||||
// existing directories anyway so the need to protect against this is
|
||||
// incredibly limited (and arguably doesn't even deserve mention here).
|
||||
//
|
||||
// Ideally we might want to check that the owner and mode match what we
|
||||
// would've created -- unfortunately, it is non-trivial to verify that
|
||||
// the owner and mode of the created directory match. While plain Unix
|
||||
// DAC rules seem simple enough to emulate, there are a bunch of other
|
||||
// factors that can change the mode or owner of created directories
|
||||
// (default POSIX ACLs, mount options like uid=1,gid=2,umask=0 on
|
||||
// filesystems like vfat, etc etc). We used to try to verify this but
|
||||
// it just lead to a series of spurious errors.
|
||||
//
|
||||
// We could also check that the directory is non-empty, but
|
||||
// unfortunately some pseduofilesystems (like cgroupfs) create
|
||||
// non-empty directories, which would result in different spurious
|
||||
// errors.
|
||||
}
|
||||
return currentDir, nil
|
||||
}
|
||||
|
||||
// MkdirAll is a race-safe alternative to the [os.MkdirAll] function,
|
||||
// where the new directory is guaranteed to be within the root directory (if an
|
||||
// attacker can move directories from inside the root to outside the root, the
|
||||
// created directory tree might be outside of the root but the key constraint
|
||||
// is that at no point will we walk outside of the directory tree we are
|
||||
// creating).
|
||||
//
|
||||
// Effectively, MkdirAll(root, unsafePath, mode) is equivalent to
|
||||
//
|
||||
// path, _ := securejoin.SecureJoin(root, unsafePath)
|
||||
// err := os.MkdirAll(path, mode)
|
||||
//
|
||||
// But is much safer. The above implementation is unsafe because if an attacker
|
||||
// can modify the filesystem tree between [SecureJoin] and [os.MkdirAll], it is
|
||||
// possible for MkdirAll to resolve unsafe symlink components and create
|
||||
// directories outside of the root.
|
||||
//
|
||||
// If you plan to open the directory after you have created it or want to use
|
||||
// an open directory handle as the root, you should use [MkdirAllHandle] instead.
|
||||
// This function is a wrapper around [MkdirAllHandle].
|
||||
func MkdirAll(root, unsafePath string, mode os.FileMode) error {
|
||||
rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rootDir.Close()
|
||||
|
||||
f, err := MkdirAllHandle(rootDir, unsafePath, mode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_ = f.Close()
|
||||
return nil
|
||||
}
|
103
vendor/github.com/cyphar/filepath-securejoin/open_linux.go
generated
vendored
Normal file
103
vendor/github.com/cyphar/filepath-securejoin/open_linux.go
generated
vendored
Normal file
@ -0,0 +1,103 @@
|
||||
//go:build linux
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided
|
||||
// using an *[os.File] handle, to ensure that the correct root directory is used.
|
||||
func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) {
|
||||
handle, err := completeLookupInRoot(root, unsafePath)
|
||||
if err != nil {
|
||||
return nil, &os.PathError{Op: "securejoin.OpenInRoot", Path: unsafePath, Err: err}
|
||||
}
|
||||
return handle, nil
|
||||
}
|
||||
|
||||
// OpenInRoot safely opens the provided unsafePath within the root.
|
||||
// Effectively, OpenInRoot(root, unsafePath) is equivalent to
|
||||
//
|
||||
// path, _ := securejoin.SecureJoin(root, unsafePath)
|
||||
// handle, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
|
||||
//
|
||||
// But is much safer. The above implementation is unsafe because if an attacker
|
||||
// can modify the filesystem tree between [SecureJoin] and [os.OpenFile], it is
|
||||
// possible for the returned file to be outside of the root.
|
||||
//
|
||||
// Note that the returned handle is an O_PATH handle, meaning that only a very
|
||||
// limited set of operations will work on the handle. This is done to avoid
|
||||
// accidentally opening an untrusted file that could cause issues (such as a
|
||||
// disconnected TTY that could cause a DoS, or some other issue). In order to
|
||||
// use the returned handle, you can "upgrade" it to a proper handle using
|
||||
// [Reopen].
|
||||
func OpenInRoot(root, unsafePath string) (*os.File, error) {
|
||||
rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rootDir.Close()
|
||||
return OpenatInRoot(rootDir, unsafePath)
|
||||
}
|
||||
|
||||
// Reopen takes an *[os.File] handle and re-opens it through /proc/self/fd.
|
||||
// Reopen(file, flags) is effectively equivalent to
|
||||
//
|
||||
// fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd())
|
||||
// os.OpenFile(fdPath, flags|unix.O_CLOEXEC)
|
||||
//
|
||||
// But with some extra hardenings to ensure that we are not tricked by a
|
||||
// maliciously-configured /proc mount. While this attack scenario is not
|
||||
// common, in container runtimes it is possible for higher-level runtimes to be
|
||||
// tricked into configuring an unsafe /proc that can be used to attack file
|
||||
// operations. See [CVE-2019-19921] for more details.
|
||||
//
|
||||
// [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw
|
||||
func Reopen(handle *os.File, flags int) (*os.File, error) {
|
||||
procRoot, err := getProcRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// We can't operate on /proc/thread-self/fd/$n directly when doing a
|
||||
// re-open, so we need to open /proc/thread-self/fd and then open a single
|
||||
// final component.
|
||||
procFdDir, closer, err := procThreadSelf(procRoot, "fd/")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err)
|
||||
}
|
||||
defer procFdDir.Close()
|
||||
defer closer()
|
||||
|
||||
// Try to detect if there is a mount on top of the magic-link we are about
|
||||
// to open. If we are using unsafeHostProcRoot(), this could change after
|
||||
// we check it (and there's nothing we can do about that) but for
|
||||
// privateProcRoot() this should be guaranteed to be safe (at least since
|
||||
// Linux 5.12[1], when anonymous mount namespaces were completely isolated
|
||||
// from external mounts including mount propagation events).
|
||||
//
|
||||
// [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
|
||||
// onto targets that reside on shared mounts").
|
||||
fdStr := strconv.Itoa(int(handle.Fd()))
|
||||
if err := checkSymlinkOvermount(procRoot, procFdDir, fdStr); err != nil {
|
||||
return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err)
|
||||
}
|
||||
|
||||
flags |= unix.O_CLOEXEC
|
||||
// Rather than just wrapping openatFile, open-code it so we can copy
|
||||
// handle.Name().
|
||||
reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err)
|
||||
}
|
||||
return os.NewFile(uintptr(reopenFd), handle.Name()), nil
|
||||
}
|
127
vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
generated
vendored
Normal file
127
vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
generated
vendored
Normal file
@ -0,0 +1,127 @@
|
||||
//go:build linux
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var hasOpenat2 = sync_OnceValue(func() bool {
|
||||
fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{
|
||||
Flags: unix.O_PATH | unix.O_CLOEXEC,
|
||||
Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT,
|
||||
})
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_ = unix.Close(fd)
|
||||
return true
|
||||
})
|
||||
|
||||
func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool {
|
||||
// RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve
|
||||
// ".." while a mount or rename occurs anywhere on the system. This could
|
||||
// happen spuriously, or as the result of an attacker trying to mess with
|
||||
// us during lookup.
|
||||
//
|
||||
// In addition, scoped lookups have a "safety check" at the end of
|
||||
// complete_walk which will return -EXDEV if the final path is not in the
|
||||
// root.
|
||||
return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 &&
|
||||
(errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV))
|
||||
}
|
||||
|
||||
const scopedLookupMaxRetries = 10
|
||||
|
||||
func openat2File(dir *os.File, path string, how *unix.OpenHow) (*os.File, error) {
|
||||
fullPath := dir.Name() + "/" + path
|
||||
// Make sure we always set O_CLOEXEC.
|
||||
how.Flags |= unix.O_CLOEXEC
|
||||
var tries int
|
||||
for tries < scopedLookupMaxRetries {
|
||||
fd, err := unix.Openat2(int(dir.Fd()), path, how)
|
||||
if err != nil {
|
||||
if scopedLookupShouldRetry(how, err) {
|
||||
// We retry a couple of times to avoid the spurious errors, and
|
||||
// if we are being attacked then returning -EAGAIN is the best
|
||||
// we can do.
|
||||
tries++
|
||||
continue
|
||||
}
|
||||
return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err}
|
||||
}
|
||||
// If we are using RESOLVE_IN_ROOT, the name we generated may be wrong.
|
||||
// NOTE: The procRoot code MUST NOT use RESOLVE_IN_ROOT, otherwise
|
||||
// you'll get infinite recursion here.
|
||||
if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT {
|
||||
if actualPath, err := rawProcSelfFdReadlink(fd); err == nil {
|
||||
fullPath = actualPath
|
||||
}
|
||||
}
|
||||
return os.NewFile(uintptr(fd), fullPath), nil
|
||||
}
|
||||
return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: errPossibleAttack}
|
||||
}
|
||||
|
||||
func lookupOpenat2(root *os.File, unsafePath string, partial bool) (*os.File, string, error) {
|
||||
if !partial {
|
||||
file, err := openat2File(root, unsafePath, &unix.OpenHow{
|
||||
Flags: unix.O_PATH | unix.O_CLOEXEC,
|
||||
Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
|
||||
})
|
||||
return file, "", err
|
||||
}
|
||||
return partialLookupOpenat2(root, unsafePath)
|
||||
}
|
||||
|
||||
// partialLookupOpenat2 is an alternative implementation of
|
||||
// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a
|
||||
// handle to the deepest existing child of the requested path within the root.
|
||||
func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, error) {
|
||||
// TODO: Implement this as a git-bisect-like binary search.
|
||||
|
||||
unsafePath = filepath.ToSlash(unsafePath) // noop
|
||||
endIdx := len(unsafePath)
|
||||
var lastError error
|
||||
for endIdx > 0 {
|
||||
subpath := unsafePath[:endIdx]
|
||||
|
||||
handle, err := openat2File(root, subpath, &unix.OpenHow{
|
||||
Flags: unix.O_PATH | unix.O_CLOEXEC,
|
||||
Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
|
||||
})
|
||||
if err == nil {
|
||||
// Jump over the slash if we have a non-"" remainingPath.
|
||||
if endIdx < len(unsafePath) {
|
||||
endIdx += 1
|
||||
}
|
||||
// We found a subpath!
|
||||
return handle, unsafePath[endIdx:], lastError
|
||||
}
|
||||
if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) {
|
||||
// That path doesn't exist, let's try the next directory up.
|
||||
endIdx = strings.LastIndexByte(subpath, '/')
|
||||
lastError = err
|
||||
continue
|
||||
}
|
||||
return nil, "", fmt.Errorf("open subpath: %w", err)
|
||||
}
|
||||
// If we couldn't open anything, the whole subpath is missing. Return a
|
||||
// copy of the root fd so that the caller doesn't close this one by
|
||||
// accident.
|
||||
rootClone, err := dupFile(root)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
return rootClone, unsafePath, lastError
|
||||
}
|
59
vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
generated
vendored
Normal file
59
vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
//go:build linux
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func dupFile(f *os.File) (*os.File, error) {
|
||||
fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err)
|
||||
}
|
||||
return os.NewFile(uintptr(fd), f.Name()), nil
|
||||
}
|
||||
|
||||
func openatFile(dir *os.File, path string, flags int, mode int) (*os.File, error) {
|
||||
// Make sure we always set O_CLOEXEC.
|
||||
flags |= unix.O_CLOEXEC
|
||||
fd, err := unix.Openat(int(dir.Fd()), path, flags, uint32(mode))
|
||||
if err != nil {
|
||||
return nil, &os.PathError{Op: "openat", Path: dir.Name() + "/" + path, Err: err}
|
||||
}
|
||||
// All of the paths we use with openatFile(2) are guaranteed to be
|
||||
// lexically safe, so we can use path.Join here.
|
||||
fullPath := filepath.Join(dir.Name(), path)
|
||||
return os.NewFile(uintptr(fd), fullPath), nil
|
||||
}
|
||||
|
||||
func fstatatFile(dir *os.File, path string, flags int) (unix.Stat_t, error) {
|
||||
var stat unix.Stat_t
|
||||
if err := unix.Fstatat(int(dir.Fd()), path, &stat, flags); err != nil {
|
||||
return stat, &os.PathError{Op: "fstatat", Path: dir.Name() + "/" + path, Err: err}
|
||||
}
|
||||
return stat, nil
|
||||
}
|
||||
|
||||
func readlinkatFile(dir *os.File, path string) (string, error) {
|
||||
size := 4096
|
||||
for {
|
||||
linkBuf := make([]byte, size)
|
||||
n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf)
|
||||
if err != nil {
|
||||
return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err}
|
||||
}
|
||||
if n != size {
|
||||
return string(linkBuf[:n]), nil
|
||||
}
|
||||
// Possible truncation, resize the buffer.
|
||||
size *= 2
|
||||
}
|
||||
}
|
452
vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
generated
vendored
Normal file
452
vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
generated
vendored
Normal file
@ -0,0 +1,452 @@
|
||||
//go:build linux
|
||||
|
||||
// Copyright (C) 2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func fstat(f *os.File) (unix.Stat_t, error) {
|
||||
var stat unix.Stat_t
|
||||
if err := unix.Fstat(int(f.Fd()), &stat); err != nil {
|
||||
return stat, &os.PathError{Op: "fstat", Path: f.Name(), Err: err}
|
||||
}
|
||||
return stat, nil
|
||||
}
|
||||
|
||||
func fstatfs(f *os.File) (unix.Statfs_t, error) {
|
||||
var statfs unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(f.Fd()), &statfs); err != nil {
|
||||
return statfs, &os.PathError{Op: "fstatfs", Path: f.Name(), Err: err}
|
||||
}
|
||||
return statfs, nil
|
||||
}
|
||||
|
||||
// The kernel guarantees that the root inode of a procfs mount has an
|
||||
// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO.
|
||||
const (
|
||||
procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC
|
||||
procRootIno = 1 // PROC_ROOT_INO
|
||||
)
|
||||
|
||||
func verifyProcRoot(procRoot *os.File) error {
|
||||
if statfs, err := fstatfs(procRoot); err != nil {
|
||||
return err
|
||||
} else if statfs.Type != procSuperMagic {
|
||||
return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type)
|
||||
}
|
||||
if stat, err := fstat(procRoot); err != nil {
|
||||
return err
|
||||
} else if stat.Ino != procRootIno {
|
||||
return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var hasNewMountApi = sync_OnceValue(func() bool {
|
||||
// All of the pieces of the new mount API we use (fsopen, fsconfig,
|
||||
// fsmount, open_tree) were added together in Linux 5.1[1,2], so we can
|
||||
// just check for one of the syscalls and the others should also be
|
||||
// available.
|
||||
//
|
||||
// Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE.
|
||||
// This is equivalent to openat(2), but tells us if open_tree is
|
||||
// available (and thus all of the other basic new mount API syscalls).
|
||||
// open_tree(2) is most light-weight syscall to test here.
|
||||
//
|
||||
// [1]: merge commit 400913252d09
|
||||
// [2]: <https://lore.kernel.org/lkml/153754740781.17872.7869536526927736855.stgit@warthog.procyon.org.uk/>
|
||||
fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_ = unix.Close(fd)
|
||||
return true
|
||||
})
|
||||
|
||||
func fsopen(fsName string, flags int) (*os.File, error) {
|
||||
// Make sure we always set O_CLOEXEC.
|
||||
flags |= unix.FSOPEN_CLOEXEC
|
||||
fd, err := unix.Fsopen(fsName, flags)
|
||||
if err != nil {
|
||||
return nil, os.NewSyscallError("fsopen "+fsName, err)
|
||||
}
|
||||
return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil
|
||||
}
|
||||
|
||||
func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) {
|
||||
// Make sure we always set O_CLOEXEC.
|
||||
flags |= unix.FSMOUNT_CLOEXEC
|
||||
fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs)
|
||||
if err != nil {
|
||||
return nil, os.NewSyscallError("fsmount "+ctx.Name(), err)
|
||||
}
|
||||
return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil
|
||||
}
|
||||
|
||||
func newPrivateProcMount() (*os.File, error) {
|
||||
procfsCtx, err := fsopen("proc", unix.FSOPEN_CLOEXEC)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer procfsCtx.Close()
|
||||
|
||||
// Try to configure hidepid=ptraceable,subset=pid if possible, but ignore errors.
|
||||
_ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable")
|
||||
_ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid")
|
||||
|
||||
// Get an actual handle.
|
||||
if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil {
|
||||
return nil, os.NewSyscallError("fsconfig create procfs", err)
|
||||
}
|
||||
return fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID)
|
||||
}
|
||||
|
||||
func openTree(dir *os.File, path string, flags uint) (*os.File, error) {
|
||||
dirFd := -int(unix.EBADF)
|
||||
dirName := "."
|
||||
if dir != nil {
|
||||
dirFd = int(dir.Fd())
|
||||
dirName = dir.Name()
|
||||
}
|
||||
// Make sure we always set O_CLOEXEC.
|
||||
flags |= unix.OPEN_TREE_CLOEXEC
|
||||
fd, err := unix.OpenTree(dirFd, path, flags)
|
||||
if err != nil {
|
||||
return nil, &os.PathError{Op: "open_tree", Path: path, Err: err}
|
||||
}
|
||||
return os.NewFile(uintptr(fd), dirName+"/"+path), nil
|
||||
}
|
||||
|
||||
func clonePrivateProcMount() (_ *os.File, Err error) {
|
||||
// Try to make a clone without using AT_RECURSIVE if we can. If this works,
|
||||
// we can be sure there are no over-mounts and so if the root is valid then
|
||||
// we're golden. Otherwise, we have to deal with over-mounts.
|
||||
procfsHandle, err := openTree(nil, "/proc", unix.OPEN_TREE_CLONE)
|
||||
if err != nil || hookForcePrivateProcRootOpenTreeAtRecursive(procfsHandle) {
|
||||
procfsHandle, err = openTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating a detached procfs clone: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
_ = procfsHandle.Close()
|
||||
}
|
||||
}()
|
||||
if err := verifyProcRoot(procfsHandle); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return procfsHandle, nil
|
||||
}
|
||||
|
||||
func privateProcRoot() (*os.File, error) {
|
||||
if !hasNewMountApi() || hookForceGetProcRootUnsafe() {
|
||||
return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP)
|
||||
}
|
||||
// Try to create a new procfs mount from scratch if we can. This ensures we
|
||||
// can get a procfs mount even if /proc is fake (for whatever reason).
|
||||
procRoot, err := newPrivateProcMount()
|
||||
if err != nil || hookForcePrivateProcRootOpenTree(procRoot) {
|
||||
// Try to clone /proc then...
|
||||
procRoot, err = clonePrivateProcMount()
|
||||
}
|
||||
return procRoot, err
|
||||
}
|
||||
|
||||
func unsafeHostProcRoot() (_ *os.File, Err error) {
|
||||
procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
_ = procRoot.Close()
|
||||
}
|
||||
}()
|
||||
if err := verifyProcRoot(procRoot); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return procRoot, nil
|
||||
}
|
||||
|
||||
func doGetProcRoot() (*os.File, error) {
|
||||
procRoot, err := privateProcRoot()
|
||||
if err != nil {
|
||||
// Fall back to using a /proc handle if making a private mount failed.
|
||||
// If we have openat2, at least we can avoid some kinds of over-mount
|
||||
// attacks, but without openat2 there's not much we can do.
|
||||
procRoot, err = unsafeHostProcRoot()
|
||||
}
|
||||
return procRoot, err
|
||||
}
|
||||
|
||||
var getProcRoot = sync_OnceValues(func() (*os.File, error) {
|
||||
return doGetProcRoot()
|
||||
})
|
||||
|
||||
var hasProcThreadSelf = sync_OnceValue(func() bool {
|
||||
return unix.Access("/proc/thread-self/", unix.F_OK) == nil
|
||||
})
|
||||
|
||||
var errUnsafeProcfs = errors.New("unsafe procfs detected")
|
||||
|
||||
type procThreadSelfCloser func()
|
||||
|
||||
// procThreadSelf returns a handle to /proc/thread-self/<subpath> (or an
|
||||
// equivalent handle on older kernels where /proc/thread-self doesn't exist).
|
||||
// Once finished with the handle, you must call the returned closer function
|
||||
// (runtime.UnlockOSThread). You must not pass the returned *os.File to other
|
||||
// Go threads or use the handle after calling the closer.
|
||||
//
|
||||
// This is similar to ProcThreadSelf from runc, but with extra hardening
|
||||
// applied and using *os.File.
|
||||
func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThreadSelfCloser, Err error) {
|
||||
// We need to lock our thread until the caller is done with the handle
|
||||
// because between getting the handle and using it we could get interrupted
|
||||
// by the Go runtime and hit the case where the underlying thread is
|
||||
// swapped out and the original thread is killed, resulting in
|
||||
// pull-your-hair-out-hard-to-debug issues in the caller.
|
||||
runtime.LockOSThread()
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
runtime.UnlockOSThread()
|
||||
}
|
||||
}()
|
||||
|
||||
// Figure out what prefix we want to use.
|
||||
threadSelf := "thread-self/"
|
||||
if !hasProcThreadSelf() || hookForceProcSelfTask() {
|
||||
/// Pre-3.17 kernels don't have /proc/thread-self, so do it manually.
|
||||
threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + "/"
|
||||
if _, err := fstatatFile(procRoot, threadSelf, unix.AT_SYMLINK_NOFOLLOW); err != nil || hookForceProcSelf() {
|
||||
// In this case, we running in a pid namespace that doesn't match
|
||||
// the /proc mount we have. This can happen inside runc.
|
||||
//
|
||||
// Unfortunately, there is no nice way to get the correct TID to
|
||||
// use here because of the age of the kernel, so we have to just
|
||||
// use /proc/self and hope that it works.
|
||||
threadSelf = "self/"
|
||||
}
|
||||
}
|
||||
|
||||
// Grab the handle.
|
||||
var (
|
||||
handle *os.File
|
||||
err error
|
||||
)
|
||||
if hasOpenat2() {
|
||||
// We prefer being able to use RESOLVE_NO_XDEV if we can, to be
|
||||
// absolutely sure we are operating on a clean /proc handle that
|
||||
// doesn't have any cheeky overmounts that could trick us (including
|
||||
// symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't
|
||||
// strictly needed, but just use it since we have it.
|
||||
//
|
||||
// NOTE: /proc/self is technically a magic-link (the contents of the
|
||||
// symlink are generated dynamically), but it doesn't use
|
||||
// nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it.
|
||||
//
|
||||
// NOTE: We MUST NOT use RESOLVE_IN_ROOT here, as openat2File uses
|
||||
// procSelfFdReadlink to clean up the returned f.Name() if we use
|
||||
// RESOLVE_IN_ROOT (which would lead to an infinite recursion).
|
||||
handle, err = openat2File(procRoot, threadSelf+subpath, &unix.OpenHow{
|
||||
Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC,
|
||||
Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS,
|
||||
})
|
||||
if err != nil {
|
||||
// TODO: Once we bump the minimum Go version to 1.20, we can use
|
||||
// multiple %w verbs for this wrapping. For now we need to use a
|
||||
// compatibility shim for older Go versions.
|
||||
//err = fmt.Errorf("%w: %w", errUnsafeProcfs, err)
|
||||
return nil, nil, wrapBaseError(err, errUnsafeProcfs)
|
||||
}
|
||||
} else {
|
||||
handle, err = openatFile(procRoot, threadSelf+subpath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
// TODO: Once we bump the minimum Go version to 1.20, we can use
|
||||
// multiple %w verbs for this wrapping. For now we need to use a
|
||||
// compatibility shim for older Go versions.
|
||||
//err = fmt.Errorf("%w: %w", errUnsafeProcfs, err)
|
||||
return nil, nil, wrapBaseError(err, errUnsafeProcfs)
|
||||
}
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
_ = handle.Close()
|
||||
}
|
||||
}()
|
||||
// We can't detect bind-mounts of different parts of procfs on top of
|
||||
// /proc (a-la RESOLVE_NO_XDEV), but we can at least be sure that we
|
||||
// aren't on the wrong filesystem here.
|
||||
if statfs, err := fstatfs(handle); err != nil {
|
||||
return nil, nil, err
|
||||
} else if statfs.Type != procSuperMagic {
|
||||
return nil, nil, fmt.Errorf("%w: incorrect /proc/self/fd filesystem type 0x%x", errUnsafeProcfs, statfs.Type)
|
||||
}
|
||||
}
|
||||
return handle, runtime.UnlockOSThread, nil
|
||||
}
|
||||
|
||||
// STATX_MNT_ID_UNIQUE is provided in golang.org/x/sys@v0.20.0, but in order to
|
||||
// avoid bumping the requirement for a single constant we can just define it
|
||||
// ourselves.
|
||||
const STATX_MNT_ID_UNIQUE = 0x4000
|
||||
|
||||
var hasStatxMountId = sync_OnceValue(func() bool {
|
||||
var (
|
||||
stx unix.Statx_t
|
||||
// We don't care which mount ID we get. The kernel will give us the
|
||||
// unique one if it is supported.
|
||||
wantStxMask uint32 = STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
|
||||
)
|
||||
err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx)
|
||||
return err == nil && stx.Mask&wantStxMask != 0
|
||||
})
|
||||
|
||||
func getMountId(dir *os.File, path string) (uint64, error) {
|
||||
// If we don't have statx(STATX_MNT_ID*) support, we can't do anything.
|
||||
if !hasStatxMountId() {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
var (
|
||||
stx unix.Statx_t
|
||||
// We don't care which mount ID we get. The kernel will give us the
|
||||
// unique one if it is supported.
|
||||
wantStxMask uint32 = STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
|
||||
)
|
||||
|
||||
err := unix.Statx(int(dir.Fd()), path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, int(wantStxMask), &stx)
|
||||
if stx.Mask&wantStxMask == 0 {
|
||||
// It's not a kernel limitation, for some reason we couldn't get a
|
||||
// mount ID. Assume it's some kind of attack.
|
||||
err = fmt.Errorf("%w: could not get mount id", errUnsafeProcfs)
|
||||
}
|
||||
if err != nil {
|
||||
return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: dir.Name() + "/" + path, Err: err}
|
||||
}
|
||||
return stx.Mnt_id, nil
|
||||
}
|
||||
|
||||
func checkSymlinkOvermount(procRoot *os.File, dir *os.File, path string) error {
|
||||
// Get the mntId of our procfs handle.
|
||||
expectedMountId, err := getMountId(procRoot, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Get the mntId of the target magic-link.
|
||||
gotMountId, err := getMountId(dir, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// As long as the directory mount is alive, even with wrapping mount IDs,
|
||||
// we would expect to see a different mount ID here. (Of course, if we're
|
||||
// using unsafeHostProcRoot() then an attaker could change this after we
|
||||
// did this check.)
|
||||
if expectedMountId != gotMountId {
|
||||
return fmt.Errorf("%w: symlink %s/%s has an overmount obscuring the real link (mount ids do not match %d != %d)", errUnsafeProcfs, dir.Name(), path, expectedMountId, gotMountId)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func doRawProcSelfFdReadlink(procRoot *os.File, fd int) (string, error) {
|
||||
fdPath := fmt.Sprintf("fd/%d", fd)
|
||||
procFdLink, closer, err := procThreadSelf(procRoot, fdPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("get safe /proc/thread-self/%s handle: %w", fdPath, err)
|
||||
}
|
||||
defer procFdLink.Close()
|
||||
defer closer()
|
||||
|
||||
// Try to detect if there is a mount on top of the magic-link. Since we use the handle directly
|
||||
// provide to the closure. If the closure uses the handle directly, this
|
||||
// should be safe in general (a mount on top of the path afterwards would
|
||||
// not affect the handle itself) and will definitely be safe if we are
|
||||
// using privateProcRoot() (at least since Linux 5.12[1], when anonymous
|
||||
// mount namespaces were completely isolated from external mounts including
|
||||
// mount propagation events).
|
||||
//
|
||||
// [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
|
||||
// onto targets that reside on shared mounts").
|
||||
if err := checkSymlinkOvermount(procRoot, procFdLink, ""); err != nil {
|
||||
return "", fmt.Errorf("check safety of /proc/thread-self/fd/%d magiclink: %w", fd, err)
|
||||
}
|
||||
|
||||
// readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit
|
||||
// 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty
|
||||
// relative pathnames").
|
||||
return readlinkatFile(procFdLink, "")
|
||||
}
|
||||
|
||||
func rawProcSelfFdReadlink(fd int) (string, error) {
|
||||
procRoot, err := getProcRoot()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return doRawProcSelfFdReadlink(procRoot, fd)
|
||||
}
|
||||
|
||||
func procSelfFdReadlink(f *os.File) (string, error) {
|
||||
return rawProcSelfFdReadlink(int(f.Fd()))
|
||||
}
|
||||
|
||||
var (
|
||||
errPossibleBreakout = errors.New("possible breakout detected")
|
||||
errInvalidDirectory = errors.New("wandered into deleted directory")
|
||||
errDeletedInode = errors.New("cannot verify path of deleted inode")
|
||||
)
|
||||
|
||||
func isDeadInode(file *os.File) error {
|
||||
// If the nlink of a file drops to 0, there is an attacker deleting
|
||||
// directories during our walk, which could result in weird /proc values.
|
||||
// It's better to error out in this case.
|
||||
stat, err := fstat(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("check for dead inode: %w", err)
|
||||
}
|
||||
if stat.Nlink == 0 {
|
||||
err := errDeletedInode
|
||||
if stat.Mode&unix.S_IFMT == unix.S_IFDIR {
|
||||
err = errInvalidDirectory
|
||||
}
|
||||
return fmt.Errorf("%w %q", err, file.Name())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkProcSelfFdPath(path string, file *os.File) error {
|
||||
if err := isDeadInode(file); err != nil {
|
||||
return err
|
||||
}
|
||||
actualPath, err := procSelfFdReadlink(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get path of handle: %w", err)
|
||||
}
|
||||
if actualPath != path {
|
||||
return fmt.Errorf("%w: handle path %q doesn't match expected path %q", errPossibleBreakout, actualPath, path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Test hooks used in the procfs tests to verify that the fallback logic works.
|
||||
// See testing_mocks_linux_test.go and procfs_linux_test.go for more details.
|
||||
var (
|
||||
hookForcePrivateProcRootOpenTree = hookDummyFile
|
||||
hookForcePrivateProcRootOpenTreeAtRecursive = hookDummyFile
|
||||
hookForceGetProcRootUnsafe = hookDummy
|
||||
|
||||
hookForceProcSelfTask = hookDummy
|
||||
hookForceProcSelf = hookDummy
|
||||
)
|
||||
|
||||
func hookDummy() bool { return false }
|
||||
func hookDummyFile(_ *os.File) bool { return false }
|
35
vendor/github.com/cyphar/filepath-securejoin/vfs.go
generated
vendored
Normal file
35
vendor/github.com/cyphar/filepath-securejoin/vfs.go
generated
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
// Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import "os"
|
||||
|
||||
// In future this should be moved into a separate package, because now there
|
||||
// are several projects (umoci and go-mtree) that are using this sort of
|
||||
// interface.
|
||||
|
||||
// VFS is the minimal interface necessary to use [SecureJoinVFS]. A nil VFS is
|
||||
// equivalent to using the standard [os].* family of functions. This is mainly
|
||||
// used for the purposes of mock testing, but also can be used to otherwise use
|
||||
// [SecureJoinVFS] with VFS-like system.
|
||||
type VFS interface {
|
||||
// Lstat returns an [os.FileInfo] describing the named file. If the
|
||||
// file is a symbolic link, the returned [os.FileInfo] describes the
|
||||
// symbolic link. Lstat makes no attempt to follow the link.
|
||||
// The semantics are identical to [os.Lstat].
|
||||
Lstat(name string) (os.FileInfo, error)
|
||||
|
||||
// Readlink returns the destination of the named symbolic link.
|
||||
// The semantics are identical to [os.Readlink].
|
||||
Readlink(name string) (string, error)
|
||||
}
|
||||
|
||||
// osVFS is the "nil" VFS, in that it just passes everything through to the os
|
||||
// module.
|
||||
type osVFS struct{}
|
||||
|
||||
func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) }
|
||||
|
||||
func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) }
|
191
vendor/github.com/opencontainers/runc/LICENSE
generated
vendored
Normal file
191
vendor/github.com/opencontainers/runc/LICENSE
generated
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2014 Docker, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
17
vendor/github.com/opencontainers/runc/NOTICE
generated
vendored
Normal file
17
vendor/github.com/opencontainers/runc/NOTICE
generated
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
runc
|
||||
|
||||
Copyright 2012-2015 Docker, Inc.
|
||||
|
||||
This product includes software developed at Docker, Inc. (http://www.docker.com).
|
||||
|
||||
The following is courtesy of our legal counsel:
|
||||
|
||||
|
||||
Use and transfer of Docker may be subject to certain restrictions by the
|
||||
United States and other governments.
|
||||
It is your responsibility to ensure that your use and/or transfer does not
|
||||
violate applicable laws.
|
||||
|
||||
For more information, please see http://www.bis.doc.gov
|
||||
|
||||
See also http://www.apache.org/dev/crypto.html and/or seek legal counsel.
|
258
vendor/github.com/opencontainers/runc/libcontainer/dmz/cloned_binary_linux.go
generated
vendored
Normal file
258
vendor/github.com/opencontainers/runc/libcontainer/dmz/cloned_binary_linux.go
generated
vendored
Normal file
@ -0,0 +1,258 @@
|
||||
package dmz
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
type SealFunc func(**os.File) error
|
||||
|
||||
var (
|
||||
_ SealFunc = sealMemfd
|
||||
_ SealFunc = sealFile
|
||||
)
|
||||
|
||||
func isExecutable(f *os.File) bool {
|
||||
if err := unix.Faccessat(int(f.Fd()), "", unix.X_OK, unix.AT_EACCESS|unix.AT_EMPTY_PATH); err == nil {
|
||||
return true
|
||||
} else if err == unix.EACCES {
|
||||
return false
|
||||
}
|
||||
path := "/proc/self/fd/" + strconv.Itoa(int(f.Fd()))
|
||||
if err := unix.Access(path, unix.X_OK); err == nil {
|
||||
return true
|
||||
} else if err == unix.EACCES {
|
||||
return false
|
||||
}
|
||||
// Cannot check -- assume it's executable (if not, exec will fail).
|
||||
logrus.Debugf("cannot do X_OK check on binary %s -- assuming it's executable", f.Name())
|
||||
return true
|
||||
}
|
||||
|
||||
const baseMemfdSeals = unix.F_SEAL_SEAL | unix.F_SEAL_SHRINK | unix.F_SEAL_GROW | unix.F_SEAL_WRITE
|
||||
|
||||
func sealMemfd(f **os.File) error {
|
||||
if err := (*f).Chmod(0o511); err != nil {
|
||||
return err
|
||||
}
|
||||
// Try to set the newer memfd sealing flags, but we ignore
|
||||
// errors because they are not needed and we want to continue
|
||||
// to work on older kernels.
|
||||
fd := (*f).Fd()
|
||||
// F_SEAL_FUTURE_WRITE -- Linux 5.1
|
||||
_, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, unix.F_SEAL_FUTURE_WRITE)
|
||||
// F_SEAL_EXEC -- Linux 6.3
|
||||
const F_SEAL_EXEC = 0x20 //nolint:revive // this matches the unix.* name
|
||||
_, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, F_SEAL_EXEC)
|
||||
// Apply all original memfd seals.
|
||||
_, err := unix.FcntlInt(fd, unix.F_ADD_SEALS, baseMemfdSeals)
|
||||
return os.NewSyscallError("fcntl(F_ADD_SEALS)", err)
|
||||
}
|
||||
|
||||
// Memfd creates a sealable executable memfd (supported since Linux 3.17).
|
||||
func Memfd(comment string) (*os.File, SealFunc, error) {
|
||||
file, err := system.ExecutableMemfd("runc_cloned:"+comment, unix.MFD_ALLOW_SEALING|unix.MFD_CLOEXEC)
|
||||
return file, sealMemfd, err
|
||||
}
|
||||
|
||||
func sealFile(f **os.File) error {
|
||||
// When sealing an O_TMPFILE-style descriptor we need to
|
||||
// re-open the path as O_PATH to clear the existing write
|
||||
// handle we have.
|
||||
opath, err := os.OpenFile(fmt.Sprintf("/proc/self/fd/%d", (*f).Fd()), unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reopen tmpfile: %w", err)
|
||||
}
|
||||
_ = (*f).Close()
|
||||
*f = opath
|
||||
return nil
|
||||
}
|
||||
|
||||
// otmpfile creates an open(O_TMPFILE) file in the given directory (supported
|
||||
// since Linux 3.11).
|
||||
func otmpfile(dir string) (*os.File, SealFunc, error) {
|
||||
file, err := os.OpenFile(dir, unix.O_TMPFILE|unix.O_RDWR|unix.O_EXCL|unix.O_CLOEXEC, 0o700)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("O_TMPFILE creation failed: %w", err)
|
||||
}
|
||||
// Make sure we actually got an unlinked O_TMPFILE descriptor.
|
||||
var stat unix.Stat_t
|
||||
if err := unix.Fstat(int(file.Fd()), &stat); err != nil {
|
||||
file.Close()
|
||||
return nil, nil, fmt.Errorf("cannot fstat O_TMPFILE fd: %w", err)
|
||||
} else if stat.Nlink != 0 {
|
||||
file.Close()
|
||||
return nil, nil, errors.New("O_TMPFILE has non-zero nlink")
|
||||
}
|
||||
return file, sealFile, err
|
||||
}
|
||||
|
||||
// mktemp creates a classic unlinked file in the given directory.
|
||||
func mktemp(dir string) (*os.File, SealFunc, error) {
|
||||
file, err := os.CreateTemp(dir, "runc.")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// Unlink the file and verify it was unlinked.
|
||||
if err := os.Remove(file.Name()); err != nil {
|
||||
return nil, nil, fmt.Errorf("unlinking classic tmpfile: %w", err)
|
||||
}
|
||||
if err := file.Chmod(0o511); err != nil {
|
||||
return nil, nil, fmt.Errorf("chmod classic tmpfile: %w", err)
|
||||
}
|
||||
var stat unix.Stat_t
|
||||
if err := unix.Fstat(int(file.Fd()), &stat); err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot fstat classic tmpfile: %w", err)
|
||||
} else if stat.Nlink != 0 {
|
||||
return nil, nil, fmt.Errorf("classic tmpfile %s has non-zero nlink after unlink", file.Name())
|
||||
}
|
||||
return file, sealFile, err
|
||||
}
|
||||
|
||||
func getSealableFile(comment, tmpDir string) (file *os.File, sealFn SealFunc, err error) {
|
||||
// First, try an executable memfd (supported since Linux 3.17).
|
||||
file, sealFn, err = Memfd(comment)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
logrus.Debugf("memfd cloned binary failed, falling back to O_TMPFILE: %v", err)
|
||||
|
||||
// The tmpDir here (c.root) might be mounted noexec, so we need a couple of
|
||||
// fallbacks to try. It's possible that none of these are writable and
|
||||
// executable, in which case there's nothing we can practically do (other
|
||||
// than mounting our own executable tmpfs, which would have its own
|
||||
// issues).
|
||||
tmpDirs := []string{
|
||||
tmpDir,
|
||||
os.TempDir(),
|
||||
"/tmp",
|
||||
".",
|
||||
"/bin",
|
||||
"/",
|
||||
}
|
||||
|
||||
// Try to fallback to O_TMPFILE (supported since Linux 3.11).
|
||||
for _, dir := range tmpDirs {
|
||||
file, sealFn, err = otmpfile(dir)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if !isExecutable(file) {
|
||||
logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir)
|
||||
file.Close()
|
||||
continue
|
||||
}
|
||||
return
|
||||
}
|
||||
logrus.Debugf("O_TMPFILE cloned binary failed, falling back to mktemp(): %v", err)
|
||||
// Finally, try a classic unlinked temporary file.
|
||||
for _, dir := range tmpDirs {
|
||||
file, sealFn, err = mktemp(dir)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if !isExecutable(file) {
|
||||
logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir)
|
||||
file.Close()
|
||||
continue
|
||||
}
|
||||
return
|
||||
}
|
||||
return nil, nil, fmt.Errorf("could not create sealable file for cloned binary: %w", err)
|
||||
}
|
||||
|
||||
// CloneBinary creates a "sealed" clone of a given binary, which can be used to
|
||||
// thwart attempts by the container process to gain access to host binaries
|
||||
// through procfs magic-link shenanigans. For more details on why this is
|
||||
// necessary, see CVE-2019-5736.
|
||||
func CloneBinary(src io.Reader, size int64, name, tmpDir string) (*os.File, error) {
|
||||
logrus.Debugf("cloning %s binary (%d bytes)", name, size)
|
||||
file, sealFn, err := getSealableFile(name, tmpDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
copied, err := system.Copy(file, src)
|
||||
if err != nil {
|
||||
file.Close()
|
||||
return nil, fmt.Errorf("copy binary: %w", err)
|
||||
} else if copied != size {
|
||||
file.Close()
|
||||
return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size)
|
||||
}
|
||||
if err := sealFn(&file); err != nil {
|
||||
file.Close()
|
||||
return nil, fmt.Errorf("could not seal fd: %w", err)
|
||||
}
|
||||
return file, nil
|
||||
}
|
||||
|
||||
// IsCloned returns whether the given file can be guaranteed to be a safe exe.
|
||||
func IsCloned(exe *os.File) bool {
|
||||
seals, err := unix.FcntlInt(exe.Fd(), unix.F_GET_SEALS, 0)
|
||||
if err != nil {
|
||||
// /proc/self/exe is probably not a memfd
|
||||
logrus.Debugf("F_GET_SEALS on %s failed: %v", exe.Name(), err)
|
||||
return false
|
||||
}
|
||||
// The memfd must have all of the base seals applied.
|
||||
logrus.Debugf("checking %s memfd seals: 0x%x", exe.Name(), seals)
|
||||
return seals&baseMemfdSeals == baseMemfdSeals
|
||||
}
|
||||
|
||||
// CloneSelfExe makes a clone of the current process's binary (through
|
||||
// /proc/self/exe). This binary can then be used for "runc init" in order to
|
||||
// make sure the container process can never resolve the original runc binary.
|
||||
// For more details on why this is necessary, see CVE-2019-5736.
|
||||
func CloneSelfExe(tmpDir string) (*os.File, error) {
|
||||
// Try to create a temporary overlayfs to produce a readonly version of
|
||||
// /proc/self/exe that cannot be "unwrapped" by the container. In contrast
|
||||
// to CloneBinary, this technique does not require any extra memory usage
|
||||
// and does not have the (fairly noticeable) performance impact of copying
|
||||
// a large binary file into a memfd.
|
||||
//
|
||||
// Based on some basic performance testing, the overlayfs approach has
|
||||
// effectively no performance overhead (it is on par with both
|
||||
// MS_BIND+MS_RDONLY and no binary cloning at all) while memfd copying adds
|
||||
// around ~60% overhead during container startup.
|
||||
overlayFile, err := sealedOverlayfs("/proc/self/exe", tmpDir)
|
||||
if err == nil {
|
||||
logrus.Debug("runc-dmz: using overlayfs for sealed /proc/self/exe") // used for tests
|
||||
return overlayFile, nil
|
||||
}
|
||||
logrus.WithError(err).Debugf("could not use overlayfs for /proc/self/exe sealing -- falling back to making a temporary copy")
|
||||
|
||||
selfExe, err := os.Open("/proc/self/exe")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening current binary: %w", err)
|
||||
}
|
||||
defer selfExe.Close()
|
||||
|
||||
stat, err := selfExe.Stat()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("checking /proc/self/exe size: %w", err)
|
||||
}
|
||||
size := stat.Size()
|
||||
|
||||
return CloneBinary(selfExe, size, "/proc/self/exe", tmpDir)
|
||||
}
|
||||
|
||||
// IsSelfExeCloned returns whether /proc/self/exe is a cloned binary that can
|
||||
// be guaranteed to be safe. This means that it must be a sealed memfd. Other
|
||||
// types of clones cannot be completely verified as safe.
|
||||
func IsSelfExeCloned() bool {
|
||||
selfExe, err := os.Open("/proc/self/exe")
|
||||
if err != nil {
|
||||
logrus.Debugf("open /proc/self/exe failed: %v", err)
|
||||
return false
|
||||
}
|
||||
defer selfExe.Close()
|
||||
return IsCloned(selfExe)
|
||||
}
|
122
vendor/github.com/opencontainers/runc/libcontainer/dmz/overlayfs_linux.go
generated
vendored
Normal file
122
vendor/github.com/opencontainers/runc/libcontainer/dmz/overlayfs_linux.go
generated
vendored
Normal file
@ -0,0 +1,122 @@
|
||||
package dmz
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
func fsopen(fsName string, flags int) (*os.File, error) {
|
||||
// Make sure we always set O_CLOEXEC.
|
||||
flags |= unix.FSOPEN_CLOEXEC
|
||||
fd, err := unix.Fsopen(fsName, flags)
|
||||
if err != nil {
|
||||
return nil, os.NewSyscallError("fsopen "+fsName, err)
|
||||
}
|
||||
return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil
|
||||
}
|
||||
|
||||
func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) {
|
||||
// Make sure we always set O_CLOEXEC.
|
||||
flags |= unix.FSMOUNT_CLOEXEC
|
||||
fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs)
|
||||
if err != nil {
|
||||
return nil, os.NewSyscallError("fsmount "+ctx.Name(), err)
|
||||
}
|
||||
runtime.KeepAlive(ctx) // make sure fd is kept alive while it's used
|
||||
return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil
|
||||
}
|
||||
|
||||
func escapeOverlayLowerDir(path string) string {
|
||||
// If the lowerdir path contains ":" we need to escape them, and if there
|
||||
// were any escape characters already (\) we need to escape those first.
|
||||
return strings.ReplaceAll(strings.ReplaceAll(path, `\`, `\\`), `:`, `\:`)
|
||||
}
|
||||
|
||||
// sealedOverlayfs will create an internal overlayfs mount using fsopen() that
|
||||
// uses the directory containing the binary as a lowerdir and a temporary tmpfs
|
||||
// as an upperdir. There is no way to "unwrap" this (unlike MS_BIND+MS_RDONLY)
|
||||
// and so we can create a safe zero-copy sealed version of /proc/self/exe.
|
||||
// This only works for privileged users and on kernels with overlayfs and
|
||||
// fsopen() enabled.
|
||||
//
|
||||
// TODO: Since Linux 5.11, overlayfs can be created inside user namespaces so
|
||||
// it is technically possible to create an overlayfs even for rootless
|
||||
// containers. Unfortunately, this would require some ugly manual CGo+fork
|
||||
// magic so we can do this later if we feel it's really needed.
|
||||
func sealedOverlayfs(binPath, tmpDir string) (_ *os.File, Err error) {
|
||||
// Try to do the superblock creation first to bail out early if we can't
|
||||
// use this method.
|
||||
overlayCtx, err := fsopen("overlay", unix.FSOPEN_CLOEXEC)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer overlayCtx.Close()
|
||||
|
||||
// binPath is going to be /proc/self/exe, so do a readlink to get the real
|
||||
// path. overlayfs needs the real underlying directory for this protection
|
||||
// mode to work properly.
|
||||
if realPath, err := os.Readlink(binPath); err == nil {
|
||||
binPath = realPath
|
||||
}
|
||||
binLowerDirPath, binName := filepath.Split(binPath)
|
||||
// Escape any ":"s or "\"s in the path.
|
||||
binLowerDirPath = escapeOverlayLowerDir(binLowerDirPath)
|
||||
|
||||
// Overlayfs requires two lowerdirs in order to run in "lower-only" mode,
|
||||
// where writes are completely blocked. Ideally we would create a dummy
|
||||
// tmpfs for this, but it turns out that overlayfs doesn't allow for
|
||||
// anonymous mountns paths.
|
||||
// NOTE: I'm working on a patch to fix this but it won't be backported.
|
||||
dummyLowerDirPath := escapeOverlayLowerDir(tmpDir)
|
||||
|
||||
// Configure the lowerdirs. The binary lowerdir needs to be on the top to
|
||||
// ensure that a file called "runc" (binName) in the dummy lowerdir doesn't
|
||||
// mask the binary.
|
||||
lowerDirStr := binLowerDirPath + ":" + dummyLowerDirPath
|
||||
if err := unix.FsconfigSetString(int(overlayCtx.Fd()), "lowerdir", lowerDirStr); err != nil {
|
||||
return nil, fmt.Errorf("fsconfig set overlayfs lowerdir=%s: %w", lowerDirStr, err)
|
||||
}
|
||||
|
||||
// We don't care about xino (Linux 4.17) but it will be auto-enabled on
|
||||
// some systems (if /run/runc and /usr/bin are on different filesystems)
|
||||
// and this produces spurious dmesg log entries. We can safely ignore
|
||||
// errors when disabling this because we don't actually care about the
|
||||
// setting and we're just opportunistically disabling it.
|
||||
_ = unix.FsconfigSetString(int(overlayCtx.Fd()), "xino", "off")
|
||||
|
||||
// Get an actual handle to the overlayfs.
|
||||
if err := unix.FsconfigCreate(int(overlayCtx.Fd())); err != nil {
|
||||
return nil, os.NewSyscallError("fsconfig create overlayfs", err)
|
||||
}
|
||||
overlayFd, err := fsmount(overlayCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOSUID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer overlayFd.Close()
|
||||
|
||||
// Grab a handle to the binary through overlayfs.
|
||||
exeFile, err := utils.Openat(overlayFd, binName, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open %s from overlayfs (lowerdir=%s): %w", binName, lowerDirStr, err)
|
||||
}
|
||||
// NOTE: We would like to check that exeFile is the same as /proc/self/exe,
|
||||
// except this is a little difficult. Depending on what filesystems the
|
||||
// layers are on, overlayfs can remap the inode numbers (and it always
|
||||
// creates its own device numbers -- see ovl_map_dev_ino) so we can't do a
|
||||
// basic stat-based check. The only reasonable option would be to hash both
|
||||
// files and compare them, but this would require fully reading both files
|
||||
// which would produce a similar performance overhead to memfd cloning.
|
||||
//
|
||||
// Ultimately, there isn't a real attack to be worried about here. An
|
||||
// attacker would need to be able to modify files in /usr/sbin (or wherever
|
||||
// runc lives), at which point they could just replace the runc binary with
|
||||
// something malicious anyway.
|
||||
return exeFile, nil
|
||||
}
|
216
vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
generated
vendored
Normal file
216
vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
generated
vendored
Normal file
@ -0,0 +1,216 @@
|
||||
//go:build linux
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type ParentDeathSignal int
|
||||
|
||||
func (p ParentDeathSignal) Restore() error {
|
||||
if p == 0 {
|
||||
return nil
|
||||
}
|
||||
current, err := GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p == current {
|
||||
return nil
|
||||
}
|
||||
return p.Set()
|
||||
}
|
||||
|
||||
func (p ParentDeathSignal) Set() error {
|
||||
return SetParentDeathSignal(uintptr(p))
|
||||
}
|
||||
|
||||
func Exec(cmd string, args []string, env []string) error {
|
||||
for {
|
||||
err := unix.Exec(cmd, args, env)
|
||||
if err != unix.EINTR {
|
||||
return &os.PathError{Op: "exec", Path: cmd, Err: err}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func execveat(fd uintptr, pathname string, args []string, env []string, flags int) error {
|
||||
pathnamep, err := syscall.BytePtrFromString(pathname)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
argvp, err := syscall.SlicePtrFromStrings(args)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
envp, err := syscall.SlicePtrFromStrings(env)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, _, errno := syscall.Syscall6(
|
||||
unix.SYS_EXECVEAT,
|
||||
fd,
|
||||
uintptr(unsafe.Pointer(pathnamep)),
|
||||
uintptr(unsafe.Pointer(&argvp[0])),
|
||||
uintptr(unsafe.Pointer(&envp[0])),
|
||||
uintptr(flags),
|
||||
0,
|
||||
)
|
||||
return errno
|
||||
}
|
||||
|
||||
func Fexecve(fd uintptr, args []string, env []string) error {
|
||||
var err error
|
||||
for {
|
||||
err = execveat(fd, "", args, env, unix.AT_EMPTY_PATH)
|
||||
if err != unix.EINTR { // nolint:errorlint // unix errors are bare
|
||||
break
|
||||
}
|
||||
}
|
||||
if err == unix.ENOSYS { // nolint:errorlint // unix errors are bare
|
||||
// Fallback to classic /proc/self/fd/... exec.
|
||||
return Exec("/proc/self/fd/"+strconv.Itoa(int(fd)), args, env)
|
||||
}
|
||||
return os.NewSyscallError("execveat", err)
|
||||
}
|
||||
|
||||
func SetParentDeathSignal(sig uintptr) error {
|
||||
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetParentDeathSignal() (ParentDeathSignal, error) {
|
||||
var sig int
|
||||
if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return ParentDeathSignal(sig), nil
|
||||
}
|
||||
|
||||
func SetKeepCaps() error {
|
||||
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ClearKeepCaps() error {
|
||||
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Setctty() error {
|
||||
if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetSubreaper sets the value i as the subreaper setting for the calling process
|
||||
func SetSubreaper(i int) error {
|
||||
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
||||
}
|
||||
|
||||
// GetSubreaper returns the subreaper setting for the calling process
|
||||
func GetSubreaper() (int, error) {
|
||||
var i uintptr
|
||||
|
||||
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return int(i), nil
|
||||
}
|
||||
|
||||
func ExecutableMemfd(comment string, flags int) (*os.File, error) {
|
||||
// Try to use MFD_EXEC first. On pre-6.3 kernels we get -EINVAL for this
|
||||
// flag. On post-6.3 kernels, with vm.memfd_noexec=1 this ensures we get an
|
||||
// executable memfd. For vm.memfd_noexec=2 this is a bit more complicated.
|
||||
// The original vm.memfd_noexec=2 implementation incorrectly silently
|
||||
// allowed MFD_EXEC[1] -- this should be fixed in 6.6. On 6.6 and newer
|
||||
// kernels, we will get -EACCES if we try to use MFD_EXEC with
|
||||
// vm.memfd_noexec=2 (for 6.3-6.5, -EINVAL was the intended return value).
|
||||
//
|
||||
// The upshot is we only need to retry without MFD_EXEC on -EINVAL because
|
||||
// it just so happens that passing MFD_EXEC bypasses vm.memfd_noexec=2 on
|
||||
// kernels where -EINVAL is actually a security denial.
|
||||
memfd, err := unix.MemfdCreate(comment, flags|unix.MFD_EXEC)
|
||||
if err == unix.EINVAL {
|
||||
memfd, err = unix.MemfdCreate(comment, flags)
|
||||
}
|
||||
if err != nil {
|
||||
if err == unix.EACCES {
|
||||
logrus.Info("memfd_create(MFD_EXEC) failed, possibly due to vm.memfd_noexec=2 -- falling back to less secure O_TMPFILE")
|
||||
}
|
||||
err := os.NewSyscallError("memfd_create", err)
|
||||
return nil, fmt.Errorf("failed to create executable memfd: %w", err)
|
||||
}
|
||||
return os.NewFile(uintptr(memfd), "/memfd:"+comment), nil
|
||||
}
|
||||
|
||||
// Copy is like io.Copy except it uses sendfile(2) if the source and sink are
|
||||
// both (*os.File) as an optimisation to make copies faster.
|
||||
func Copy(dst io.Writer, src io.Reader) (copied int64, err error) {
|
||||
dstFile, _ := dst.(*os.File)
|
||||
srcFile, _ := src.(*os.File)
|
||||
|
||||
if dstFile != nil && srcFile != nil {
|
||||
fi, err := srcFile.Stat()
|
||||
if err != nil {
|
||||
goto fallback
|
||||
}
|
||||
size := fi.Size()
|
||||
for size > 0 {
|
||||
n, err := unix.Sendfile(int(dstFile.Fd()), int(srcFile.Fd()), nil, int(size))
|
||||
if n > 0 {
|
||||
size -= int64(n)
|
||||
copied += int64(n)
|
||||
}
|
||||
if err == unix.EINTR {
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
if copied == 0 {
|
||||
// If we haven't copied anything so far, we can safely just
|
||||
// fallback to io.Copy. We could always do the fallback but
|
||||
// it's safer to error out in the case of a partial copy
|
||||
// followed by an error (which should never happen).
|
||||
goto fallback
|
||||
}
|
||||
return copied, fmt.Errorf("partial sendfile copy: %w", err)
|
||||
}
|
||||
}
|
||||
return copied, nil
|
||||
}
|
||||
|
||||
fallback:
|
||||
return io.Copy(dst, src)
|
||||
}
|
||||
|
||||
// SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation.
|
||||
// checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion.
|
||||
func SetLinuxPersonality(personality int) error {
|
||||
_, _, errno := unix.Syscall(unix.SYS_PERSONALITY, uintptr(personality), 0, 0)
|
||||
if errno != 0 {
|
||||
return &os.SyscallError{Syscall: "set_personality", Err: errno}
|
||||
}
|
||||
return nil
|
||||
}
|
127
vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
generated
vendored
Normal file
127
vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
generated
vendored
Normal file
@ -0,0 +1,127 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// State is the status of a process.
|
||||
type State rune
|
||||
|
||||
const ( // Only values for Linux 3.14 and later are listed here
|
||||
Dead State = 'X'
|
||||
DiskSleep State = 'D'
|
||||
Running State = 'R'
|
||||
Sleeping State = 'S'
|
||||
Stopped State = 'T'
|
||||
TracingStop State = 't'
|
||||
Zombie State = 'Z'
|
||||
Parked State = 'P'
|
||||
Idle State = 'I'
|
||||
)
|
||||
|
||||
// String forms of the state from proc(5)'s documentation for
|
||||
// /proc/[pid]/status' "State" field.
|
||||
func (s State) String() string {
|
||||
switch s {
|
||||
case Dead:
|
||||
return "dead"
|
||||
case DiskSleep:
|
||||
return "disk sleep"
|
||||
case Running:
|
||||
return "running"
|
||||
case Sleeping:
|
||||
return "sleeping"
|
||||
case Stopped:
|
||||
return "stopped"
|
||||
case TracingStop:
|
||||
return "tracing stop"
|
||||
case Zombie:
|
||||
return "zombie"
|
||||
case Parked:
|
||||
return "parked"
|
||||
case Idle:
|
||||
return "idle" // kernel thread
|
||||
default:
|
||||
return fmt.Sprintf("unknown (%c)", s)
|
||||
}
|
||||
}
|
||||
|
||||
// Stat_t represents the information from /proc/[pid]/stat, as
|
||||
// described in proc(5) with names based on the /proc/[pid]/status
|
||||
// fields.
|
||||
type Stat_t struct {
|
||||
// Name is the command run by the process.
|
||||
Name string
|
||||
|
||||
// State is the state of the process.
|
||||
State State
|
||||
|
||||
// StartTime is the number of clock ticks after system boot (since
|
||||
// Linux 2.6).
|
||||
StartTime uint64
|
||||
}
|
||||
|
||||
// Stat returns a Stat_t instance for the specified process.
|
||||
func Stat(pid int) (stat Stat_t, err error) {
|
||||
bytes, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
||||
if err != nil {
|
||||
return stat, err
|
||||
}
|
||||
return parseStat(string(bytes))
|
||||
}
|
||||
|
||||
func parseStat(data string) (stat Stat_t, err error) {
|
||||
// Example:
|
||||
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
// The fields are space-separated, see full description in proc(5).
|
||||
//
|
||||
// We are only interested in:
|
||||
// * field 2: process name. It is the only field enclosed into
|
||||
// parenthesis, as it can contain spaces (and parenthesis) inside.
|
||||
// * field 3: process state, a single character (%c)
|
||||
// * field 22: process start time, a long unsigned integer (%llu).
|
||||
|
||||
// 1. Look for the first '(' and the last ')' first, what's in between is Name.
|
||||
// We expect at least 20 fields and a space after the last one.
|
||||
|
||||
const minAfterName = 20*2 + 1 // the min field is '0 '.
|
||||
|
||||
first := strings.IndexByte(data, '(')
|
||||
if first < 0 || first+minAfterName >= len(data) {
|
||||
return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
|
||||
}
|
||||
|
||||
last := strings.LastIndexByte(data, ')')
|
||||
if last <= first || last+minAfterName >= len(data) {
|
||||
return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
|
||||
}
|
||||
|
||||
stat.Name = data[first+1 : last]
|
||||
|
||||
// 2. Remove fields 1 and 2 and a space after. State is right after.
|
||||
data = data[last+2:]
|
||||
stat.State = State(data[0])
|
||||
|
||||
// 3. StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces.
|
||||
skipSpaces := 22 - 3
|
||||
for first = 0; skipSpaces > 0 && first < len(data); first++ {
|
||||
if data[first] == ' ' {
|
||||
skipSpaces--
|
||||
}
|
||||
}
|
||||
// Now first points to StartTime; look for space right after.
|
||||
i := strings.IndexByte(data[first:], ' ')
|
||||
if i < 0 {
|
||||
return stat, fmt.Errorf("invalid stat data (too short): %q", data)
|
||||
}
|
||||
stat.StartTime, err = strconv.ParseUint(data[first:first+i], 10, 64)
|
||||
if err != nil {
|
||||
return stat, fmt.Errorf("invalid stat data (bad start time): %w", err)
|
||||
}
|
||||
|
||||
return stat, nil
|
||||
}
|
15
vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux.go
generated
vendored
Normal file
15
vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux.go
generated
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
//go:build go1.23
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// ClearRlimitNofileCache clears go runtime's nofile rlimit cache. The argument
|
||||
// is process RLIMIT_NOFILE values. Relies on go.dev/cl/588076.
|
||||
func ClearRlimitNofileCache(lim *syscall.Rlimit) {
|
||||
// Ignore the return values since we only need to clean the cache,
|
||||
// the limit is going to be set via unix.Prlimit elsewhere.
|
||||
_ = syscall.Setrlimit(syscall.RLIMIT_NOFILE, lim)
|
||||
}
|
27
vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux_go122.go
generated
vendored
Normal file
27
vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux_go122.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
//go:build !go1.23
|
||||
|
||||
// TODO: remove this file once go 1.22 is no longer supported.
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
_ "unsafe" // Needed for go:linkname to work.
|
||||
)
|
||||
|
||||
//go:linkname syscallOrigRlimitNofile syscall.origRlimitNofile
|
||||
var syscallOrigRlimitNofile atomic.Pointer[syscall.Rlimit]
|
||||
|
||||
// ClearRlimitNofileCache clears go runtime's nofile rlimit cache.
|
||||
// The argument is process RLIMIT_NOFILE values.
|
||||
func ClearRlimitNofileCache(_ *syscall.Rlimit) {
|
||||
// As reported in issue #4195, the new version of go runtime(since 1.19)
|
||||
// will cache rlimit-nofile. Before executing execve, the rlimit-nofile
|
||||
// of the process will be restored with the cache. In runc, this will
|
||||
// cause the rlimit-nofile setting by the parent process for the container
|
||||
// to become invalid. It can be solved by clearing this cache. But
|
||||
// unfortunately, go stdlib doesn't provide such function, so we need to
|
||||
// link to the private var `origRlimitNofile` in package syscall to hack.
|
||||
syscallOrigRlimitNofile.Store(nil)
|
||||
}
|
119
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
Normal file
119
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
Normal file
@ -0,0 +1,119 @@
|
||||
package utils
|
||||
|
||||
/*
|
||||
* Copyright 2016, 2017 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// MaxNameLen is the maximum length of the name of a file descriptor being sent
|
||||
// using SendFile. The name of the file handle returned by RecvFile will never be
|
||||
// larger than this value.
|
||||
const MaxNameLen = 4096
|
||||
|
||||
// oobSpace is the size of the oob slice required to store a single FD. Note
|
||||
// that unix.UnixRights appears to make the assumption that fd is always int32,
|
||||
// so sizeof(fd) = 4.
|
||||
var oobSpace = unix.CmsgSpace(4)
|
||||
|
||||
// RecvFile waits for a file descriptor to be sent over the given AF_UNIX
|
||||
// socket. The file name of the remote file descriptor will be recreated
|
||||
// locally (it is sent as non-auxiliary data in the same payload).
|
||||
func RecvFile(socket *os.File) (_ *os.File, Err error) {
|
||||
name := make([]byte, MaxNameLen)
|
||||
oob := make([]byte, oobSpace)
|
||||
|
||||
sockfd := socket.Fd()
|
||||
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if n >= MaxNameLen || oobn != oobSpace {
|
||||
return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
||||
}
|
||||
// Truncate.
|
||||
name = name[:n]
|
||||
oob = oob[:oobn]
|
||||
|
||||
scms, err := unix.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// We cannot control how many SCM_RIGHTS we receive, and upon receiving
|
||||
// them all of the descriptors are installed in our fd table, so we need to
|
||||
// parse all of the SCM_RIGHTS we received in order to close all of the
|
||||
// descriptors on error.
|
||||
var fds []int
|
||||
defer func() {
|
||||
for i, fd := range fds {
|
||||
if i == 0 && Err == nil {
|
||||
// Only close the first one on error.
|
||||
continue
|
||||
}
|
||||
// Always close extra ones.
|
||||
_ = unix.Close(fd)
|
||||
}
|
||||
}()
|
||||
var lastErr error
|
||||
for _, scm := range scms {
|
||||
if scm.Header.Type == unix.SCM_RIGHTS {
|
||||
scmFds, err := unix.ParseUnixRights(&scm)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
} else {
|
||||
fds = append(fds, scmFds...)
|
||||
}
|
||||
}
|
||||
}
|
||||
if lastErr != nil {
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// We do this after collecting the fds to make sure we close them all when
|
||||
// returning an error here.
|
||||
if len(scms) != 1 {
|
||||
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
||||
}
|
||||
if len(fds) != 1 {
|
||||
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
|
||||
}
|
||||
return os.NewFile(uintptr(fds[0]), string(name)), nil
|
||||
}
|
||||
|
||||
// SendFile sends a file over the given AF_UNIX socket. file.Name() is also
|
||||
// included so that if the other end uses RecvFile, the file will have the same
|
||||
// name information.
|
||||
func SendFile(socket *os.File, file *os.File) error {
|
||||
name := file.Name()
|
||||
if len(name) >= MaxNameLen {
|
||||
return fmt.Errorf("sendfd: filename too long: %s", name)
|
||||
}
|
||||
err := SendRawFd(socket, name, file.Fd())
|
||||
runtime.KeepAlive(file)
|
||||
return err
|
||||
}
|
||||
|
||||
// SendRawFd sends a specific file descriptor over the given AF_UNIX socket.
|
||||
func SendRawFd(socket *os.File, msg string, fd uintptr) error {
|
||||
oob := unix.UnixRights(int(fd))
|
||||
return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0)
|
||||
}
|
115
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
Normal file
115
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
Normal file
@ -0,0 +1,115 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
exitSignalOffset = 128
|
||||
)
|
||||
|
||||
// ExitStatus returns the correct exit status for a process based on if it
|
||||
// was signaled or exited cleanly
|
||||
func ExitStatus(status unix.WaitStatus) int {
|
||||
if status.Signaled() {
|
||||
return exitSignalOffset + int(status.Signal())
|
||||
}
|
||||
return status.ExitStatus()
|
||||
}
|
||||
|
||||
// WriteJSON writes the provided struct v to w using standard json marshaling
|
||||
// without a trailing newline. This is used instead of json.Encoder because
|
||||
// there might be a problem in json decoder in some cases, see:
|
||||
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
|
||||
func WriteJSON(w io.Writer, v interface{}) error {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = w.Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
// CleanPath makes a path safe for use with filepath.Join. This is done by not
|
||||
// only cleaning the path, but also (if the path is relative) adding a leading
|
||||
// '/' and cleaning it (then removing the leading '/'). This ensures that a
|
||||
// path resulting from prepending another path will always resolve to lexically
|
||||
// be a subdirectory of the prefixed path. This is all done lexically, so paths
|
||||
// that include symlinks won't be safe as a result of using CleanPath.
|
||||
func CleanPath(path string) string {
|
||||
// Deal with empty strings nicely.
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure that all paths are cleaned (especially problematic ones like
|
||||
// "/../../../../../" which can cause lots of issues).
|
||||
path = filepath.Clean(path)
|
||||
|
||||
// If the path isn't absolute, we need to do more processing to fix paths
|
||||
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
|
||||
// paths to relative ones.
|
||||
if !filepath.IsAbs(path) {
|
||||
path = filepath.Clean(string(os.PathSeparator) + path)
|
||||
// This can't fail, as (by definition) all paths are relative to root.
|
||||
path, _ = filepath.Rel(string(os.PathSeparator), path)
|
||||
}
|
||||
|
||||
// Clean the path again for good measure.
|
||||
return filepath.Clean(path)
|
||||
}
|
||||
|
||||
// stripRoot returns the passed path, stripping the root path if it was
|
||||
// (lexicially) inside it. Note that both passed paths will always be treated
|
||||
// as absolute, and the returned path will also always be absolute. In
|
||||
// addition, the paths are cleaned before stripping the root.
|
||||
func stripRoot(root, path string) string {
|
||||
// Make the paths clean and absolute.
|
||||
root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||
switch {
|
||||
case path == root:
|
||||
path = "/"
|
||||
case root == "/":
|
||||
// do nothing
|
||||
case strings.HasPrefix(path, root+"/"):
|
||||
path = strings.TrimPrefix(path, root+"/")
|
||||
}
|
||||
return CleanPath("/" + path)
|
||||
}
|
||||
|
||||
// SearchLabels searches through a list of key=value pairs for a given key,
|
||||
// returning its value, and the binary flag telling whether the key exist.
|
||||
func SearchLabels(labels []string, key string) (string, bool) {
|
||||
key += "="
|
||||
for _, s := range labels {
|
||||
if strings.HasPrefix(s, key) {
|
||||
return s[len(key):], true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Annotations returns the bundle path and user defined annotations from the
|
||||
// libcontainer state. We need to remove the bundle because that is a label
|
||||
// added by libcontainer.
|
||||
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
|
||||
userAnnotations = make(map[string]string)
|
||||
for _, l := range labels {
|
||||
name, value, ok := strings.Cut(l, "=")
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if name == "bundle" {
|
||||
bundle = value
|
||||
} else {
|
||||
userAnnotations[name] = value
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
360
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
Normal file
360
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
Normal file
@ -0,0 +1,360 @@
|
||||
//go:build !windows
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
_ "unsafe" // for go:linkname
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// EnsureProcHandle returns whether or not the given file handle is on procfs.
|
||||
func EnsureProcHandle(fh *os.File) error {
|
||||
var buf unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
|
||||
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
|
||||
}
|
||||
if buf.Type != unix.PROC_SUPER_MAGIC {
|
||||
return fmt.Errorf("%s is not on procfs", fh.Name())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
haveCloseRangeCloexecBool bool
|
||||
haveCloseRangeCloexecOnce sync.Once
|
||||
)
|
||||
|
||||
func haveCloseRangeCloexec() bool {
|
||||
haveCloseRangeCloexecOnce.Do(func() {
|
||||
// Make sure we're not closing a random file descriptor.
|
||||
tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer unix.Close(tmpFd)
|
||||
|
||||
err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC)
|
||||
// Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC).
|
||||
// -ENOSYS and -EINVAL ultimately mean we don't have support, but any
|
||||
// other potential error would imply that even the most basic close
|
||||
// operation wouldn't work.
|
||||
haveCloseRangeCloexecBool = err == nil
|
||||
})
|
||||
return haveCloseRangeCloexecBool
|
||||
}
|
||||
|
||||
type fdFunc func(fd int)
|
||||
|
||||
// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
|
||||
// the current process.
|
||||
func fdRangeFrom(minFd int, fn fdFunc) error {
|
||||
procSelfFd, closer := ProcThreadSelf("fd")
|
||||
defer closer()
|
||||
|
||||
fdDir, err := os.Open(procSelfFd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fdDir.Close()
|
||||
|
||||
if err := EnsureProcHandle(fdDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fdList, err := fdDir.Readdirnames(-1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, fdStr := range fdList {
|
||||
fd, err := strconv.Atoi(fdStr)
|
||||
// Ignore non-numeric file names.
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// Ignore descriptors lower than our specified minimum.
|
||||
if fd < minFd {
|
||||
continue
|
||||
}
|
||||
// Ignore the file descriptor we used for readdir, as it will be closed
|
||||
// when we return.
|
||||
if uintptr(fd) == fdDir.Fd() {
|
||||
continue
|
||||
}
|
||||
// Run the closure.
|
||||
fn(fd)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
|
||||
// equal to minFd in the current process.
|
||||
func CloseExecFrom(minFd int) error {
|
||||
// Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
|
||||
if haveCloseRangeCloexec() {
|
||||
err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
|
||||
return os.NewSyscallError("close_range", err)
|
||||
}
|
||||
// Otherwise, fall back to the standard loop.
|
||||
return fdRangeFrom(minFd, unix.CloseOnExec)
|
||||
}
|
||||
|
||||
//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
|
||||
|
||||
// In order to make sure we do not close the internal epoll descriptors the Go
|
||||
// runtime uses, we need to ensure that we skip descriptors that match
|
||||
// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
|
||||
// unfortunately there's no other way to be sure we're only keeping the file
|
||||
// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
|
||||
func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
|
||||
|
||||
// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
|
||||
// current process, except for those critical to Go's runtime (such as the
|
||||
// netpoll management descriptors).
|
||||
//
|
||||
// NOTE: That this function is incredibly dangerous to use in most Go code, as
|
||||
// closing file descriptors from underneath *os.File handles can lead to very
|
||||
// bad behaviour (the closed file descriptor can be re-used and then any
|
||||
// *os.File operations would apply to the wrong file). This function is only
|
||||
// intended to be called from the last stage of runc init.
|
||||
func UnsafeCloseFrom(minFd int) error {
|
||||
// We cannot use close_range(2) even if it is available, because we must
|
||||
// not close some file descriptors.
|
||||
return fdRangeFrom(minFd, func(fd int) {
|
||||
if runtime_IsPollDescriptor(uintptr(fd)) {
|
||||
// These are the Go runtimes internal netpoll file descriptors.
|
||||
// These file descriptors are operated on deep in the Go scheduler,
|
||||
// and closing those files from underneath Go can result in panics.
|
||||
// There is no issue with keeping them because they are not
|
||||
// executable and are not useful to an attacker anyway. Also we
|
||||
// don't have any choice.
|
||||
return
|
||||
}
|
||||
// There's nothing we can do about errors from close(2), and the
|
||||
// only likely error to be seen is EBADF which indicates the fd was
|
||||
// already closed (in which case, we got what we wanted).
|
||||
_ = unix.Close(fd)
|
||||
})
|
||||
}
|
||||
|
||||
// NewSockPair returns a new SOCK_STREAM unix socket pair.
|
||||
func NewSockPair(name string) (parent, child *os.File, err error) {
|
||||
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
|
||||
}
|
||||
|
||||
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
||||
// corresponding to the unsafePath resolved within the root. Before passing the
|
||||
// fd, this path is verified to have been inside the root -- so operating on it
|
||||
// through the passed fdpath should be safe. Do not access this path through
|
||||
// the original path strings, and do not attempt to use the pathname outside of
|
||||
// the passed closure (the file handle will be freed once the closure returns).
|
||||
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||
// Remove the root then forcefully resolve inside the root.
|
||||
unsafePath = stripRoot(root, unsafePath)
|
||||
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
||||
}
|
||||
|
||||
procSelfFd, closer := ProcThreadSelf("fd/")
|
||||
defer closer()
|
||||
|
||||
// Open the target path.
|
||||
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open o_path procfd: %w", err)
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd())))
|
||||
// Double-check the path is the one we expected.
|
||||
if realpath, err := os.Readlink(procfd); err != nil {
|
||||
return fmt.Errorf("procfd verification failed: %w", err)
|
||||
} else if realpath != path {
|
||||
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||
}
|
||||
|
||||
return fn(procfd)
|
||||
}
|
||||
|
||||
type ProcThreadSelfCloser func()
|
||||
|
||||
var (
|
||||
haveProcThreadSelf bool
|
||||
haveProcThreadSelfOnce sync.Once
|
||||
)
|
||||
|
||||
// ProcThreadSelf returns a string that is equivalent to
|
||||
// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where
|
||||
// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin,
|
||||
// meaning that the passed string needs to be trusted. The caller _must_ call
|
||||
// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread)
|
||||
// *only once* after it has finished using the returned path string.
|
||||
func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
|
||||
haveProcThreadSelfOnce.Do(func() {
|
||||
if _, err := os.Stat("/proc/thread-self/"); err == nil {
|
||||
haveProcThreadSelf = true
|
||||
} else {
|
||||
logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
|
||||
}
|
||||
})
|
||||
|
||||
// We need to lock our thread until the caller is done with the path string
|
||||
// because any non-atomic operation on the path (such as opening a file,
|
||||
// then reading it) could be interrupted by the Go runtime where the
|
||||
// underlying thread is swapped out and the original thread is killed,
|
||||
// resulting in pull-your-hair-out-hard-to-debug issues in the caller. In
|
||||
// addition, the pre-3.17 fallback makes everything non-atomic because the
|
||||
// same thing could happen between unix.Gettid() and the path operations.
|
||||
//
|
||||
// In theory, we don't need to lock in the atomic user case when using
|
||||
// /proc/thread-self/, but it's better to be safe than sorry (and there are
|
||||
// only one or two truly atomic users of /proc/thread-self/).
|
||||
runtime.LockOSThread()
|
||||
|
||||
threadSelf := "/proc/thread-self/"
|
||||
if !haveProcThreadSelf {
|
||||
// Pre-3.17 kernels did not have /proc/thread-self, so do it manually.
|
||||
threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/"
|
||||
if _, err := os.Stat(threadSelf); err != nil {
|
||||
// Unfortunately, this code is called from rootfs_linux.go where we
|
||||
// are running inside the pid namespace of the container but /proc
|
||||
// is the host's procfs. Unfortunately there is no real way to get
|
||||
// the correct tid to use here (the kernel age means we cannot do
|
||||
// things like set up a private fsopen("proc") -- even scanning
|
||||
// NSpid in all of the tasks in /proc/self/task/*/status requires
|
||||
// Linux 4.1).
|
||||
//
|
||||
// So, we just have to assume that /proc/self is acceptable in this
|
||||
// one specific case.
|
||||
if os.Getpid() == 1 {
|
||||
logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
|
||||
} else {
|
||||
// This should never happen, but the fallback should work in most cases...
|
||||
logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
|
||||
}
|
||||
threadSelf = "/proc/self/"
|
||||
}
|
||||
}
|
||||
return threadSelf + subpath, runtime.UnlockOSThread
|
||||
}
|
||||
|
||||
// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to
|
||||
// create a /proc/thread-self handle for given file descriptor.
|
||||
//
|
||||
// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
|
||||
// without using fmt.Sprintf to avoid unneeded overhead.
|
||||
func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
|
||||
return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
|
||||
}
|
||||
|
||||
// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"),
|
||||
// but properly handling the case where path or root are "/".
|
||||
//
|
||||
// NOTE: The return value only make sense if the path doesn't contain "..".
|
||||
func IsLexicallyInRoot(root, path string) bool {
|
||||
if root != "/" {
|
||||
root += "/"
|
||||
}
|
||||
if path != "/" {
|
||||
path += "/"
|
||||
}
|
||||
return strings.HasPrefix(path, root)
|
||||
}
|
||||
|
||||
// MkdirAllInRootOpen attempts to make
|
||||
//
|
||||
// path, _ := securejoin.SecureJoin(root, unsafePath)
|
||||
// os.MkdirAll(path, mode)
|
||||
// os.Open(path)
|
||||
//
|
||||
// safer against attacks where components in the path are changed between
|
||||
// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
|
||||
// try to detect any symlink components in the path while we are doing the
|
||||
// MkdirAll.
|
||||
//
|
||||
// NOTE: If unsafePath is a subpath of root, we assume that you have already
|
||||
// called SecureJoin and so we use the provided path verbatim without resolving
|
||||
// any symlinks (this is done in a way that avoids symlink-exchange races).
|
||||
// This means that the path also must not contain ".." elements, otherwise an
|
||||
// error will occur.
|
||||
//
|
||||
// This uses securejoin.MkdirAllHandle under the hood, but it has special
|
||||
// handling if unsafePath has already been scoped within the rootfs (this is
|
||||
// needed for a lot of runc callers and fixing this would require reworking a
|
||||
// lot of path logic).
|
||||
func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (_ *os.File, Err error) {
|
||||
// If the path is already "within" the root, get the path relative to the
|
||||
// root and use that as the unsafe path. This is necessary because a lot of
|
||||
// MkdirAllInRootOpen callers have already done SecureJoin, and refactoring
|
||||
// all of them to stop using these SecureJoin'd paths would require a fair
|
||||
// amount of work.
|
||||
// TODO(cyphar): Do the refactor to libpathrs once it's ready.
|
||||
if IsLexicallyInRoot(root, unsafePath) {
|
||||
subPath, err := filepath.Rel(root, unsafePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
unsafePath = subPath
|
||||
}
|
||||
|
||||
// Check for any silly mode bits.
|
||||
if mode&^0o7777 != 0 {
|
||||
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
|
||||
}
|
||||
// Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if
|
||||
// passed. While it would make sense to return an error in that case (since
|
||||
// the user has asked for a mode that won't be applied), for compatibility
|
||||
// reasons we have to ignore these bits.
|
||||
if ignoredBits := mode &^ 0o1777; ignoredBits != 0 {
|
||||
logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits)
|
||||
mode &= 0o1777
|
||||
}
|
||||
|
||||
rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open root handle: %w", err)
|
||||
}
|
||||
defer rootDir.Close()
|
||||
|
||||
return securejoin.MkdirAllHandle(rootDir, unsafePath, mode)
|
||||
}
|
||||
|
||||
// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
|
||||
// returned handle, for callers that don't need to use it.
|
||||
func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) error {
|
||||
f, err := MkdirAllInRootOpen(root, unsafePath, mode)
|
||||
if err == nil {
|
||||
_ = f.Close()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Openat is a Go-friendly openat(2) wrapper.
|
||||
func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
|
||||
dirFd := unix.AT_FDCWD
|
||||
if dir != nil {
|
||||
dirFd = int(dir.Fd())
|
||||
}
|
||||
flags |= unix.O_CLOEXEC
|
||||
|
||||
fd, err := unix.Openat(dirFd, path, flags, mode)
|
||||
if err != nil {
|
||||
return nil, &os.PathError{Op: "openat", Path: path, Err: err}
|
||||
}
|
||||
return os.NewFile(uintptr(fd), dir.Name()+"/"+path), nil
|
||||
}
|
10
vendor/modules.txt
vendored
10
vendor/modules.txt
vendored
@ -14,6 +14,9 @@ github.com/NVIDIA/go-nvml/pkg/nvml/mock
|
||||
# github.com/cpuguy83/go-md2man/v2 v2.0.5
|
||||
## explicit; go 1.11
|
||||
github.com/cpuguy83/go-md2man/v2/md2man
|
||||
# github.com/cyphar/filepath-securejoin v0.4.1
|
||||
## explicit; go 1.18
|
||||
github.com/cyphar/filepath-securejoin
|
||||
# github.com/davecgh/go-spew v1.1.1
|
||||
## explicit
|
||||
github.com/davecgh/go-spew/spew
|
||||
@ -30,6 +33,11 @@ github.com/google/uuid
|
||||
# github.com/moby/sys/symlink v0.3.0
|
||||
## explicit; go 1.17
|
||||
github.com/moby/sys/symlink
|
||||
# github.com/opencontainers/runc v1.2.5
|
||||
## explicit; go 1.22
|
||||
github.com/opencontainers/runc/libcontainer/dmz
|
||||
github.com/opencontainers/runc/libcontainer/system
|
||||
github.com/opencontainers/runc/libcontainer/utils
|
||||
# github.com/opencontainers/runtime-spec v1.2.0
|
||||
## explicit
|
||||
github.com/opencontainers/runtime-spec/specs-go
|
||||
@ -38,8 +46,6 @@ github.com/opencontainers/runtime-spec/specs-go
|
||||
github.com/opencontainers/runtime-tools/generate
|
||||
github.com/opencontainers/runtime-tools/generate/seccomp
|
||||
github.com/opencontainers/runtime-tools/validate/capabilities
|
||||
# github.com/opencontainers/selinux v1.11.0
|
||||
## explicit; go 1.19
|
||||
# github.com/pelletier/go-toml v1.9.5
|
||||
## explicit; go 1.12
|
||||
github.com/pelletier/go-toml
|
||||
|
Loading…
Reference in New Issue
Block a user