mirror of
https://github.com/deepseek-ai/3FS
synced 2025-06-26 18:16:45 +00:00
370 lines
16 KiB
C++
370 lines
16 KiB
C++
#include <algorithm>
|
|
#include <cassert>
|
|
#include <fcntl.h>
|
|
#include <folly/Likely.h>
|
|
#include <folly/experimental/coro/Collect.h>
|
|
#include <folly/logging/xlog.h>
|
|
#include <linux/fs.h>
|
|
#include <memory>
|
|
#include <optional>
|
|
#include <set>
|
|
#include <string>
|
|
#include <sys/stat.h>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "common/kv/ITransaction.h"
|
|
#include "common/utils/Coroutine.h"
|
|
#include "common/utils/FaultInjection.h"
|
|
#include "common/utils/Path.h"
|
|
#include "common/utils/Result.h"
|
|
#include "common/utils/UtcTime.h"
|
|
#include "fbs/core/user/User.h"
|
|
#include "fbs/meta/Common.h"
|
|
#include "fbs/meta/Service.h"
|
|
#include "meta/components/GcManager.h"
|
|
#include "meta/store/DirEntry.h"
|
|
#include "meta/store/Inode.h"
|
|
#include "meta/store/MetaStore.h"
|
|
#include "meta/store/Operation.h"
|
|
#include "meta/store/PathResolve.h"
|
|
#include "meta/store/Utils.h"
|
|
#include "meta/store/ops/SetAttr.h"
|
|
|
|
namespace hf3fs::meta::server {
|
|
|
|
/** MetaStore::rename */
|
|
/**
|
|
* Note: rename operation in POSIX and HDFS has different semantic when destination exists.
|
|
* In POSIX, if destination is a file or empty directory, it will be replaced automatilally (special case:
|
|
* set RENAME_NOREPLACE flags in renameat2).
|
|
* In HDFS,
|
|
* - if destination is a file, rename operation will raise FileAlreadyExistsException;
|
|
* - if destination is a directory and source is file, source will be moved under destination (eg: mv file dir ->
|
|
* dir/file);
|
|
* - if both source and destination are directories, all children of source will be moved under destination recursively
|
|
* (we have decided to not provide this semantic because it's too complicated).
|
|
*
|
|
* This function implements POSIX semantic.
|
|
*/
|
|
class RenameOp : public Operation<RenameRsp> {
|
|
public:
|
|
RenameOp(MetaStore &meta, const RenameReq &req)
|
|
: Operation<RenameRsp>(meta),
|
|
req_(req) {}
|
|
|
|
OPERATION_TAGS(req_);
|
|
|
|
static bool underTrash(const std::vector<Inode> &ancestors) {
|
|
return ancestors.size() >= 2 && ancestors[ancestors.size() - 1].id == InodeId::root() &&
|
|
ancestors[ancestors.size() - 2].asDirectory().name == "trash";
|
|
}
|
|
|
|
bool needIdempotent(Uuid &clientId, Uuid &requestId) const override {
|
|
if (!req_.checkUuid()) return false;
|
|
if (!req_.moveToTrash && !config().idempotent_rename()) return false;
|
|
clientId = req_.client.uuid;
|
|
requestId = req_.uuid;
|
|
return true;
|
|
}
|
|
|
|
CoTryTask<Void> checkLoop(IReadWriteTransaction &txn,
|
|
const PathResolveOp::ResolveResult &srcResult,
|
|
const PathResolveOp::ResolveResult &dstResult,
|
|
std::optional<Path> &origPath) {
|
|
auto dstAncestors = std::vector<Inode>();
|
|
CO_RETURN_ON_ERROR(co_await Inode::loadAncestors(txn, dstAncestors, dstResult.getParentId()));
|
|
assert(!dstAncestors.empty());
|
|
|
|
for (auto &ancestor : dstAncestors) {
|
|
// src is not dst's ancestor
|
|
if (ancestor.id == srcResult.dirEntry->id) {
|
|
// try to move directory into it's descendent
|
|
co_return makeError(StatusCode::kInvalidArg, "try to move directory into it's descendent");
|
|
}
|
|
|
|
// move into a deleted directory
|
|
if (ancestor.nlink == 0) {
|
|
co_return makeError(MetaCode::kNotFound);
|
|
}
|
|
|
|
// check root
|
|
if (ancestor.id == ancestor.asDirectory().parent) {
|
|
if (ancestor.id == InodeId::root()) {
|
|
break;
|
|
} else if (ancestor.id == InodeId::gcRoot()) {
|
|
XLOGF(ERR, "RenameOp: {} move directory into a removed directory", req_);
|
|
co_return makeError(MetaCode::kNoPermission);
|
|
} else {
|
|
XLOGF(DFATAL, "Inode {} parent is itself", ancestor);
|
|
co_return makeError(MetaCode::kFoundBug);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (underTrash(dstAncestors)) {
|
|
XLOGF_IF(FATAL, !srcResult.dirEntry->isDirectory(), "{} not directory", *srcResult.dirEntry);
|
|
auto srcAncestors = std::vector<Inode>();
|
|
CO_RETURN_ON_ERROR(co_await Inode::loadAncestors(txn, srcAncestors, srcResult.getParentId()));
|
|
|
|
if (req_.moveToTrash || config().allow_directly_move_to_trash()) {
|
|
auto acl = srcResult.dirEntry->dirAcl;
|
|
if (!acl) {
|
|
XLOGF(DFATAL, "DirEntry {} is directory, but don't have acl", *srcResult.dirEntry);
|
|
co_return makeError(MetaCode::kFoundBug);
|
|
}
|
|
// try to move a directory into trash directory, should be owner and have rwx permission
|
|
CO_RETURN_ON_ERROR(acl->checkRecursiveRmPerm(req_.user, config().recursive_remove_check_owner()));
|
|
|
|
auto recursiveCheck = config().recursive_remove_perm_check();
|
|
if (recursiveCheck) {
|
|
auto res =
|
|
co_await DirEntryList::recursiveCheckRmPerm(txn, srcResult.dirEntry->id, req_.user, recursiveCheck, 128);
|
|
CO_RETURN_ON_ERROR(res);
|
|
}
|
|
} else if (req_.user.uid != flat::Uid(0)) {
|
|
// src should already in trash
|
|
if (!underTrash(srcAncestors)) {
|
|
co_return makeError(MetaCode::kNoPermission, "try to move into trash directory without moveToTrash");
|
|
}
|
|
}
|
|
|
|
origPath = Path(srcResult.dirEntry->name);
|
|
for (auto &ancestor : srcAncestors) {
|
|
origPath = ancestor.asDirectory().name / *origPath;
|
|
}
|
|
}
|
|
|
|
co_return Void{};
|
|
}
|
|
|
|
CoTryTask<Void> snapshotLoadInode(IReadWriteTransaction &txn, const DirEntry &entry, std::optional<Inode> &inode) {
|
|
if (!inode.has_value()) {
|
|
auto result = co_await entry.snapshotLoadInode(txn);
|
|
CO_RETURN_ON_ERROR(result);
|
|
inode = std::move(*result);
|
|
}
|
|
co_return Void{};
|
|
}
|
|
|
|
CoTryTask<Void> checkPermission(IReadWriteTransaction &txn,
|
|
PathResolveOp::ResolveResult &resolve,
|
|
std::optional<Inode> &inode,
|
|
bool dst) {
|
|
auto parent = co_await resolve.getParentInode(txn);
|
|
CO_RETURN_ON_ERROR(parent);
|
|
CO_RETURN_ON_ERROR(parent->acl.checkPermission(req_.user, AccessType::WRITE));
|
|
CO_RETURN_ON_ERROR(parent->asDirectory().checkLock(req_.client));
|
|
if (dst && !parent->nlink) {
|
|
// can't rename into a removed directory
|
|
co_return makeError(MetaCode::kNotFound);
|
|
}
|
|
if (!resolve.dirEntry.has_value()) {
|
|
co_return Void{};
|
|
}
|
|
|
|
auto &entry = *resolve.dirEntry;
|
|
CO_RETURN_ON_ERROR(co_await snapshotLoadInode(txn, entry, inode));
|
|
if (inode->acl.iflags & FS_IMMUTABLE_FL) {
|
|
auto msg = fmt::format("rename can't move {}, FS_IMMUTABLE_FL set on inode", entry);
|
|
XLOG(DBG, msg);
|
|
co_return makeError(MetaCode::kNoPermission, msg);
|
|
}
|
|
|
|
// The sticky bit (S_ISVTX) on a directory means that a file in that directory can be renamed or deleted
|
|
// only by the owner of the file, by the owner of the directory, and by a privileged process.
|
|
if ((parent->acl.perm & S_ISVTX) && req_.user.uid != parent->acl.uid && !req_.user.isRoot()) {
|
|
// not owner of directory and not owner of privileged process, should be owner of file
|
|
if (req_.user.uid != inode->acl.uid) {
|
|
auto msg = fmt::format("rename can't move {} {}, S_ISVTX set on parent {} {}",
|
|
entry,
|
|
inode->acl,
|
|
resolve.getParentId(),
|
|
parent->acl);
|
|
XLOG(DBG, msg);
|
|
co_return makeError(MetaCode::kNoPermission, msg);
|
|
}
|
|
}
|
|
|
|
co_return Void{};
|
|
}
|
|
|
|
CoTryTask<std::optional<std::pair<InodeId, uint16_t>>> removeDst(IReadWriteTransaction &txn,
|
|
PathResolveOp::ResolveResult &dst,
|
|
std::optional<Inode> &dstInode) {
|
|
if (!dst.dirEntry.has_value()) {
|
|
co_return std::nullopt;
|
|
}
|
|
|
|
assert(dst.dirEntry->name == req_.dest.path->filename().native());
|
|
if (dst.dirEntry->isFile()) {
|
|
// let GC task free file chunks.
|
|
CO_RETURN_ON_ERROR(co_await snapshotLoadInode(txn, *dst.dirEntry, dstInode));
|
|
CO_RETURN_ON_ERROR(
|
|
co_await gcManager().removeEntry(txn, *dst.dirEntry, *dstInode, GcInfo{req_.user.uid, dst.dirEntry->name}));
|
|
assert(dstInode->id == dst.dirEntry->id);
|
|
co_return std::pair<InodeId, uint16_t>{dstInode->id, dstInode->nlink};
|
|
} else if (dst.dirEntry->isDirectory()) {
|
|
// empty directory, can remove Inode directly
|
|
CO_RETURN_ON_ERROR(co_await Inode(dst.dirEntry->id).remove(txn));
|
|
co_return std::pair<InodeId, uint16_t>{dst.dirEntry->id, 0};
|
|
} else {
|
|
XLOGF_IF(DFATAL, !dst.dirEntry->isSymlink(), "{} not symlink, shouldn't happen", *dst.dirEntry);
|
|
// need load inode and check refcnt
|
|
auto inode = co_await dst.dirEntry->loadInode(txn);
|
|
CO_RETURN_ON_ERROR(inode);
|
|
if (UNLIKELY(inode->nlink == 0)) {
|
|
auto msg = fmt::format("entry {} exists, but inode {} nlink == 0", *dst.dirEntry, inode);
|
|
XLOG(DFATAL, msg);
|
|
co_return makeError(MetaCode::kFoundBug, msg);
|
|
}
|
|
// NOTE: The fuse client may have cached this symlink. If delete it immediately, kNotFound will be reported for
|
|
// subsequent visits. The temporary solution is not to delete the symlink inode. This problem needs to be resolved
|
|
// later.
|
|
SetAttr::update(inode->ctime, UtcClock::now(), config().time_granularity(), true);
|
|
auto refcnt = --inode->nlink;
|
|
CO_RETURN_ON_ERROR(co_await inode->store(txn));
|
|
// if (refcnt != 0) {
|
|
// CO_RETURN_ON_ERROR(co_await inode->store(txn));
|
|
// } else {
|
|
// CO_RETURN_ON_ERROR(co_await inode->remove(txn));
|
|
// }
|
|
co_return std::pair<InodeId, uint16_t>{dst.dirEntry->id, refcnt};
|
|
}
|
|
}
|
|
|
|
CoTryTask<RenameRsp> run(IReadWriteTransaction &txn) override {
|
|
XLOGF(DBG, "RenameOp: {}", req_);
|
|
|
|
CHECK_REQUEST(req_);
|
|
|
|
auto [srcResult, dstResult] =
|
|
co_await folly::coro::collectAll(resolve(txn, req_.user).path(req_.src, AtFlags(AT_SYMLINK_NOFOLLOW)),
|
|
resolve(txn, req_.user).path(req_.dest, AtFlags(AT_SYMLINK_NOFOLLOW)));
|
|
CO_RETURN_ON_ERROR(srcResult);
|
|
CO_RETURN_ON_ERROR(dstResult);
|
|
|
|
// check dst, transaction may already executed.
|
|
if (dstResult->dirEntry.has_value() && dstResult->dirEntry->uuid != Uuid::zero() &&
|
|
dstResult->dirEntry->uuid == req_.uuid) {
|
|
// this may happens when FDB returns commit_unknown_result, or we failed to send response to client
|
|
XLOGF(CRITICAL, "Rename already finished, dst {}, req {}, uuid {}", *dstResult->dirEntry, req_, req_.uuid);
|
|
auto inode = co_await dstResult->dirEntry->snapshotLoadInode(txn);
|
|
CO_RETURN_ON_ERROR(inode);
|
|
co_return RenameRsp(std::move(*inode));
|
|
}
|
|
|
|
// src should exists
|
|
if (!srcResult->dirEntry.has_value()) {
|
|
co_return MAKE_ERROR_F(MetaCode::kNotFound, "rename src {} not found", req_.src);
|
|
}
|
|
// check src InodeId
|
|
if (req_.inodeId && srcResult->dirEntry->id != req_.inodeId) {
|
|
co_return MAKE_ERROR_F(MetaCode::kNotFound, "rename src {}, inodeId != {}", *srcResult->dirEntry, *req_.inodeId);
|
|
}
|
|
// if src and dst points to same dir entry, do nothing
|
|
if (dstResult->dirEntry.has_value() && dstResult->dirEntry->parent == srcResult->dirEntry->parent &&
|
|
dstResult->dirEntry->name == srcResult->dirEntry->name) {
|
|
auto inode = co_await dstResult->dirEntry->snapshotLoadInode(txn);
|
|
CO_RETURN_ON_ERROR(inode);
|
|
co_return RenameRsp(std::move(*inode));
|
|
}
|
|
// move to trash shouldn't replace file already exists
|
|
if (dstResult->dirEntry.has_value() && dstResult->dirEntry->isFile() && req_.moveToTrash) {
|
|
co_return MAKE_ERROR_F(MetaCode::kExists, "rename dest {} exist", req_.dest);
|
|
}
|
|
// dst shouldn't be a non-empty directory
|
|
if (dstResult->dirEntry.has_value() && dstResult->dirEntry->isDirectory()) {
|
|
auto checkResult = co_await DirEntryList::checkEmpty(txn, dstResult->dirEntry->id);
|
|
CO_RETURN_ON_ERROR(checkResult);
|
|
bool empty = checkResult.value();
|
|
if (!empty) {
|
|
co_return MAKE_ERROR_F(MetaCode::kNotEmpty, "rename dest {} not empty", req_.dest);
|
|
}
|
|
}
|
|
// now, dst can be safely replaced (not exist, empty directory, file, symlink).
|
|
std::optional<Path> origPath;
|
|
if (srcResult->dirEntry->isDirectory()) {
|
|
if (dstResult->dirEntry.has_value() && !dstResult->dirEntry->isDirectory()) {
|
|
// man 2 rename: oldpath can specify a directory. In this case, newpath must either not exist, or it must
|
|
// specify an empty directory.
|
|
co_return makeError(MetaCode::kNotDirectory);
|
|
}
|
|
CO_RETURN_ON_ERROR(co_await checkLoop(txn, *srcResult, *dstResult, origPath));
|
|
}
|
|
|
|
// permission check
|
|
std::optional<Inode> srcInode, dstInode;
|
|
CO_RETURN_ON_ERROR(co_await checkPermission(txn, *srcResult, srcInode, false));
|
|
CO_RETURN_ON_ERROR(co_await checkPermission(txn, *dstResult, dstInode, true));
|
|
|
|
// NOTE: add src/dst's parent inode and dirEntry into read conflict set.
|
|
CO_RETURN_ON_ERROR(co_await Inode(srcResult->getParentId()).addIntoReadConflict(txn));
|
|
CO_RETURN_ON_ERROR(co_await srcResult->dirEntry->addIntoReadConflict(txn));
|
|
CO_RETURN_ON_ERROR(co_await Inode(dstResult->getParentId()).addIntoReadConflict(txn));
|
|
CO_RETURN_ON_ERROR(
|
|
co_await DirEntry(dstResult->getParentId(), req_.dest.path->filename().native()).addIntoReadConflict(txn));
|
|
|
|
auto &srcEntry = srcResult->dirEntry.value();
|
|
auto inodeResult = co_await srcEntry.loadInode(txn);
|
|
CO_RETURN_ON_ERROR(inodeResult);
|
|
auto &inode = inodeResult.value();
|
|
if (srcEntry.isDirectory()) {
|
|
// NOTE: add src's inode into read conflict set.
|
|
// load inode and update it's parent, read modify write, should use load.
|
|
inode.asDirectory().parent = dstResult->getParentId();
|
|
inode.asDirectory().name = req_.dest.path->filename().native();
|
|
auto updateInodeResult = co_await inode.store(txn);
|
|
CO_RETURN_ON_ERROR(updateInodeResult);
|
|
}
|
|
|
|
// remove src entry and dst entry
|
|
CO_RETURN_ON_ERROR(co_await srcEntry.remove(txn));
|
|
auto removeDstResult = co_await removeDst(txn, *dstResult, dstInode);
|
|
CO_RETURN_ON_ERROR(removeDstResult);
|
|
auto &oldDst = *removeDstResult;
|
|
|
|
// create dst entry
|
|
DirEntry newDstEntry(dstResult->getParentId(), req_.dest.path->filename().native());
|
|
newDstEntry.data() = srcEntry.data();
|
|
newDstEntry.uuid = req_.uuid;
|
|
CO_RETURN_ON_ERROR(co_await newDstEntry.store(txn));
|
|
|
|
auto &event = addEvent(Event::Type::Rename)
|
|
.addField("srcParent", srcEntry.parent)
|
|
.addField("srcName", srcEntry.name)
|
|
.addField("dstParent", newDstEntry.parent)
|
|
.addField("dstName", newDstEntry.name)
|
|
.addField("inode", newDstEntry.id)
|
|
.addField("user", req_.user.uid)
|
|
.addField("host", req_.client.hostname);
|
|
addTrace(MetaEventTrace{.eventType = Event::Type::Rename,
|
|
.inodeId = newDstEntry.id,
|
|
.parentId = srcEntry.parent,
|
|
.entryName = srcEntry.name,
|
|
.dstParentId = newDstEntry.parent,
|
|
.dstEntryName = newDstEntry.name,
|
|
.userId = req_.user.uid,
|
|
.client = req_.client,
|
|
.origPath = origPath.value_or(Path())});
|
|
|
|
if (oldDst.has_value()) {
|
|
auto [oldDstInode, oldDstNlink] = *oldDst;
|
|
event.addField("oldDstInode", oldDstInode).addField("oldDstNlink", oldDstNlink);
|
|
}
|
|
|
|
if (origPath.has_value()) {
|
|
event.addField("origPath", origPath->string());
|
|
}
|
|
|
|
co_return RenameRsp(std::move(inode));
|
|
}
|
|
|
|
private:
|
|
const RenameReq &req_;
|
|
};
|
|
|
|
MetaStore::OpPtr<RenameRsp> MetaStore::rename(const RenameReq &req) { return std::make_unique<RenameOp>(*this, req); }
|
|
|
|
} // namespace hf3fs::meta::server
|